def __init__(self, n_classes=1000, bn_param=(0.1, 1e-5), dropout_rate=0, depth_list=2, expand_ratio_list=0.25, width_mult_list=1.0): self.depth_list = val2list(depth_list) self.expand_ratio_list = val2list(expand_ratio_list) self.width_mult_list = val2list(width_mult_list) # sort self.depth_list.sort() self.expand_ratio_list.sort() self.width_mult_list.sort() input_channel = [ make_divisible(64 * width_mult, MyNetwork.CHANNEL_DIVISIBLE) for width_mult in self.width_mult_list ] mid_input_channel = [ make_divisible(channel // 2, MyNetwork.CHANNEL_DIVISIBLE) for channel in input_channel ] stage_width_list = ResNets.STAGE_WIDTH_LIST.copy() for i, width in enumerate(stage_width_list): stage_width_list[i] = [ make_divisible(width * width_mult, MyNetwork.CHANNEL_DIVISIBLE) for width_mult in self.width_mult_list ] n_block_list = [base_depth + max(self.depth_list) for base_depth in ResNets.BASE_DEPTH_LIST] stride_list = [1, 2, 2, 2] # build input stem input_stem = [ DynamicConvLayer(val2list(3), mid_input_channel, 3, stride=2, use_bn=True, act_func='relu'), ResidualBlock( DynamicConvLayer(mid_input_channel, mid_input_channel, 3, stride=1, use_bn=True, act_func='relu'), IdentityLayer(mid_input_channel, mid_input_channel) ), DynamicConvLayer(mid_input_channel, input_channel, 3, stride=1, use_bn=True, act_func='relu') ] # blocks blocks = [] for d, width, s in zip(n_block_list, stage_width_list, stride_list): for i in range(d): stride = s if i == 0 else 1 bottleneck_block = DynamicResNetBottleneckBlock( input_channel, width, expand_ratio_list=self.expand_ratio_list, kernel_size=3, stride=stride, act_func='relu', downsample_mode='avgpool_conv', ) blocks.append(bottleneck_block) input_channel = width # classifier classifier = DynamicLinearLayer(input_channel, n_classes, dropout_rate=dropout_rate) super(OFAResNets, self).__init__(input_stem, blocks, classifier) # set bn param self.set_bn_param(*bn_param) # runtime_depth self.input_stem_skipping = 0 self.runtime_depth = [0] * len(n_block_list)
def re_organize_middle_weights(self, expand_ratio_stage=0): # conv3 -> conv2 importance = torch.sum(torch.abs(self.conv3.conv.conv.weight.data), dim=(0, 2, 3)) if isinstance(self.conv2.bn, DynamicGroupNorm): channel_per_group = self.conv2.bn.channel_per_group importance_chunks = torch.split(importance, channel_per_group) for chunk in importance_chunks: chunk.data.fill_(torch.mean(chunk)) importance = torch.cat(importance_chunks, dim=0) if expand_ratio_stage > 0: sorted_expand_list = copy.deepcopy(self.expand_ratio_list) sorted_expand_list.sort(reverse=True) target_width_list = [ make_divisible(round(max(self.out_channel_list) * expand), MyNetwork.CHANNEL_DIVISIBLE) for expand in sorted_expand_list ] right = len(importance) base = - len(target_width_list) * 1e5 for i in range(expand_ratio_stage + 1): left = target_width_list[i] importance[left:right] += base base += 1e5 right = left sorted_importance, sorted_idx = torch.sort(importance, dim=0, descending=True) self.conv3.conv.conv.weight.data = torch.index_select(self.conv3.conv.conv.weight.data, 1, sorted_idx) adjust_bn_according_to_idx(self.conv2.bn.bn, sorted_idx) self.conv2.conv.conv.weight.data = torch.index_select(self.conv2.conv.conv.weight.data, 0, sorted_idx) # conv2 -> conv1 importance = torch.sum(torch.abs(self.conv2.conv.conv.weight.data), dim=(0, 2, 3)) if isinstance(self.conv1.bn, DynamicGroupNorm): channel_per_group = self.conv1.bn.channel_per_group importance_chunks = torch.split(importance, channel_per_group) for chunk in importance_chunks: chunk.data.fill_(torch.mean(chunk)) importance = torch.cat(importance_chunks, dim=0) if expand_ratio_stage > 0: sorted_expand_list = copy.deepcopy(self.expand_ratio_list) sorted_expand_list.sort(reverse=True) target_width_list = [ make_divisible(round(max(self.out_channel_list) * expand), MyNetwork.CHANNEL_DIVISIBLE) for expand in sorted_expand_list ] right = len(importance) base = - len(target_width_list) * 1e5 for i in range(expand_ratio_stage + 1): left = target_width_list[i] importance[left:right] += base base += 1e5 right = left sorted_importance, sorted_idx = torch.sort(importance, dim=0, descending=True) self.conv2.conv.conv.weight.data = torch.index_select(self.conv2.conv.conv.weight.data, 1, sorted_idx) adjust_bn_according_to_idx(self.conv1.bn.bn, sorted_idx) self.conv1.conv.conv.weight.data = torch.index_select(self.conv1.conv.conv.weight.data, 0, sorted_idx) return None
def re_organize_middle_weights(self, expand_ratio_stage=0): importance = torch.sum(torch.abs(self.point_linear.conv.conv.weight.data), dim=(0, 2, 3)) if isinstance(self.depth_conv.bn, DynamicGroupNorm): channel_per_group = self.depth_conv.bn.channel_per_group importance_chunks = torch.split(importance, channel_per_group) for chunk in importance_chunks: chunk.data.fill_(torch.mean(chunk)) importance = torch.cat(importance_chunks, dim=0) if expand_ratio_stage > 0: sorted_expand_list = copy.deepcopy(self.expand_ratio_list) sorted_expand_list.sort(reverse=True) target_width_list = [ make_divisible(round(max(self.in_channel_list) * expand), MyNetwork.CHANNEL_DIVISIBLE) for expand in sorted_expand_list ] right = len(importance) base = - len(target_width_list) * 1e5 for i in range(expand_ratio_stage + 1): left = target_width_list[i] importance[left:right] += base base += 1e5 right = left sorted_importance, sorted_idx = torch.sort(importance, dim=0, descending=True) self.point_linear.conv.conv.weight.data = torch.index_select( self.point_linear.conv.conv.weight.data, 1, sorted_idx ) adjust_bn_according_to_idx(self.depth_conv.bn.bn, sorted_idx) self.depth_conv.conv.conv.weight.data = torch.index_select( self.depth_conv.conv.conv.weight.data, 0, sorted_idx ) if self.use_se: # se expand: output dim 0 reorganize se_expand = self.depth_conv.se.fc.expand se_expand.weight.data = torch.index_select(se_expand.weight.data, 0, sorted_idx) se_expand.bias.data = torch.index_select(se_expand.bias.data, 0, sorted_idx) # se reduce: input dim 1 reorganize se_reduce = self.depth_conv.se.fc.reduce se_reduce.weight.data = torch.index_select(se_reduce.weight.data, 1, sorted_idx) # middle weight reorganize se_importance = torch.sum(torch.abs(se_expand.weight.data), dim=(0, 2, 3)) se_importance, se_idx = torch.sort(se_importance, dim=0, descending=True) se_expand.weight.data = torch.index_select(se_expand.weight.data, 1, se_idx) se_reduce.weight.data = torch.index_select(se_reduce.weight.data, 0, se_idx) se_reduce.bias.data = torch.index_select(se_reduce.bias.data, 0, se_idx) if self.inverted_bottleneck is not None: adjust_bn_according_to_idx(self.inverted_bottleneck.bn.bn, sorted_idx) self.inverted_bottleneck.conv.conv.weight.data = torch.index_select( self.inverted_bottleneck.conv.conv.weight.data, 0, sorted_idx ) return None else: return sorted_idx
def __init__(self, in_channel_list, out_channel_list, kernel_size_list=3, expand_ratio_list=6, stride=1, act_func='relu6', use_se=False): super(DynamicMBConvLayer, self).__init__() self.in_channel_list = in_channel_list self.out_channel_list = out_channel_list self.kernel_size_list = val2list(kernel_size_list) self.expand_ratio_list = val2list(expand_ratio_list) self.stride = stride self.act_func = act_func self.use_se = use_se # build modules max_middle_channel = make_divisible( round(max(self.in_channel_list) * max(self.expand_ratio_list)), MyNetwork.CHANNEL_DIVISIBLE) if max(self.expand_ratio_list) == 1: self.inverted_bottleneck = None else: self.inverted_bottleneck = nn.Sequential( OrderedDict([ ('conv', DynamicConv2d(max(self.in_channel_list), max_middle_channel)), ('bn', DynamicBatchNorm2d(max_middle_channel)), ('act', build_activation(self.act_func)), ])) self.depth_conv = nn.Sequential( OrderedDict([('conv', DynamicSeparableConv2d(max_middle_channel, self.kernel_size_list, self.stride)), ('bn', DynamicBatchNorm2d(max_middle_channel)), ('act', build_activation(self.act_func))])) if self.use_se: self.depth_conv.add_module('se', DynamicSE(max_middle_channel)) self.point_linear = nn.Sequential( OrderedDict([ ('conv', DynamicConv2d(max_middle_channel, max(self.out_channel_list))), ('bn', DynamicBatchNorm2d(max(self.out_channel_list))), ])) self.active_kernel_size = max(self.kernel_size_list) self.active_expand_ratio = max(self.expand_ratio_list) self.active_out_channel = max(self.out_channel_list)
def __init__(self, in_channel_list, out_channel_list, expand_ratio_list=0.25, kernel_size=3, stride=1, act_func='relu', downsample_mode='avgpool_conv'): super(DynamicResNetBottleneckBlock, self).__init__() self.in_channel_list = in_channel_list self.out_channel_list = out_channel_list self.expand_ratio_list = val2list(expand_ratio_list) self.kernel_size = kernel_size self.stride = stride self.act_func = act_func self.downsample_mode = downsample_mode # build modules max_middle_channel = make_divisible( round(max(self.out_channel_list) * max(self.expand_ratio_list)), MyNetwork.CHANNEL_DIVISIBLE) self.conv1 = nn.Sequential(OrderedDict([ ('conv', DynamicConv2d(max(self.in_channel_list), max_middle_channel)), ('bn', DynamicBatchNorm2d(max_middle_channel)), ('act', build_activation(self.act_func, inplace=True)), ])) self.conv2 = nn.Sequential(OrderedDict([ ('conv', DynamicConv2d(max_middle_channel, max_middle_channel, kernel_size, stride)), ('bn', DynamicBatchNorm2d(max_middle_channel)), ('act', build_activation(self.act_func, inplace=True)) ])) self.conv3 = nn.Sequential(OrderedDict([ ('conv', DynamicConv2d(max_middle_channel, max(self.out_channel_list))), ('bn', DynamicBatchNorm2d(max(self.out_channel_list))), ])) if self.stride == 1 and self.in_channel_list == self.out_channel_list: self.downsample = IdentityLayer(max(self.in_channel_list), max(self.out_channel_list)) elif self.downsample_mode == 'conv': self.downsample = nn.Sequential(OrderedDict([ ('conv', DynamicConv2d(max(self.in_channel_list), max(self.out_channel_list), stride=stride)), ('bn', DynamicBatchNorm2d(max(self.out_channel_list))), ])) elif self.downsample_mode == 'avgpool_conv': self.downsample = nn.Sequential(OrderedDict([ ('avg_pool', nn.AvgPool2d(kernel_size=stride, stride=stride, padding=0, ceil_mode=True)), ('conv', DynamicConv2d(max(self.in_channel_list), max(self.out_channel_list))), ('bn', DynamicBatchNorm2d(max(self.out_channel_list))), ])) else: raise NotImplementedError self.final_act = build_activation(self.act_func, inplace=True) self.active_expand_ratio = max(self.expand_ratio_list) self.active_out_channel = max(self.out_channel_list)
def count_flops_given_config(net_config, image_size=224): flops = 0 # first conv flops += count_conv_flop((image_size + 1) // 2, 3, net_config['first_conv']['out_channels'], 3, 1) # blocks fsize = (image_size + 1) // 2 for block in net_config['blocks']: mb_conv = block[ 'mobile_inverted_conv'] if 'mobile_inverted_conv' in block else block[ 'conv'] if mb_conv is None: continue out_fz = int((fsize - 1) / mb_conv['stride'] + 1) if mb_conv['mid_channels'] is None: mb_conv['mid_channels'] = round(mb_conv['in_channels'] * mb_conv['expand_ratio']) if mb_conv['expand_ratio'] != 1: # inverted bottleneck flops += count_conv_flop(fsize, mb_conv['in_channels'], mb_conv['mid_channels'], 1, 1) # depth conv flops += count_conv_flop(out_fz, mb_conv['mid_channels'], mb_conv['mid_channels'], mb_conv['kernel_size'], mb_conv['mid_channels']) if mb_conv['use_se']: # SE layer se_mid = make_divisible(mb_conv['mid_channels'] // 4, divisor=MyNetwork.CHANNEL_DIVISIBLE) flops += count_conv_flop(1, mb_conv['mid_channels'], se_mid, 1, 1) flops += count_conv_flop(1, se_mid, mb_conv['mid_channels'], 1, 1) # point linear flops += count_conv_flop(out_fz, mb_conv['mid_channels'], mb_conv['out_channels'], 1, 1) fsize = out_fz # final expand layer flops += count_conv_flop( fsize, net_config['final_expand_layer']['in_channels'], net_config['final_expand_layer']['out_channels'], 1, 1) # feature mix layer flops += count_conv_flop( 1, net_config['feature_mix_layer']['in_channels'], net_config['feature_mix_layer']['out_channels'], 1, 1) # classifier flops += count_conv_flop(1, net_config['classifier']['in_features'], net_config['classifier']['out_features'], 1, 1) return flops / 1e6 # MFLOPs
def forward(self, x): in_channel = x.size(1) if self.inverted_bottleneck is not None: self.inverted_bottleneck.conv.active_out_channel = \ make_divisible(round(in_channel * self.active_expand_ratio), MyNetwork.CHANNEL_DIVISIBLE) self.depth_conv.conv.active_kernel_size = self.active_kernel_size self.point_linear.conv.active_out_channel = self.active_out_channel if self.inverted_bottleneck is not None: x = self.inverted_bottleneck(x) x = self.depth_conv(x) x = self.point_linear(x) return x
def count_flops_given_config(net_config, image_size=224): flops = 0 # input stem for layer_config in net_config['input_stem']: if layer_config['name'] != 'ConvLayer': layer_config = layer_config['conv'] in_channel = layer_config['in_channels'] out_channel = layer_config['out_channels'] out_image_size = int((image_size - 1) / layer_config['stride'] + 1) flops += count_conv_flop(out_image_size, in_channel, out_channel, layer_config['kernel_size'], layer_config.get('groups', 1)) image_size = out_image_size # max pooling image_size = int((image_size - 1) / 2 + 1) # ResNetBottleneckBlocks for block_config in net_config['blocks']: in_channel = block_config['in_channels'] out_channel = block_config['out_channels'] out_image_size = int((image_size - 1) / block_config['stride'] + 1) mid_channel = block_config['mid_channels'] if block_config['mid_channels'] is not None \ else round(out_channel * block_config['expand_ratio']) mid_channel = make_divisible(mid_channel, MyNetwork.CHANNEL_DIVISIBLE) # conv1 flops += count_conv_flop(image_size, in_channel, mid_channel, 1, 1) # conv2 flops += count_conv_flop(out_image_size, mid_channel, mid_channel, block_config['kernel_size'], block_config['groups']) # conv3 flops += count_conv_flop(out_image_size, mid_channel, out_channel, 1, 1) # downsample if block_config['stride'] == 1 and in_channel == out_channel: pass else: flops += count_conv_flop(out_image_size, in_channel, out_channel, 1, 1) image_size = out_image_size # final classifier flops += count_conv_flop(1, net_config['classifier']['in_features'], net_config['classifier']['out_features'], 1, 1) return flops / 1e6 # MFLOPs
def forward(self, x, groups=None): in_channel = x.size(1) num_mid = make_divisible(in_channel // self.reduction, divisor=MyNetwork.CHANNEL_DIVISIBLE) y = x.mean(3, keepdim=True).mean(2, keepdim=True) # reduce reduce_filter = self.get_active_reduce_weight(num_mid, in_channel, groups=groups).contiguous() reduce_bias = self.get_active_reduce_bias(num_mid) y = F.conv2d(y, reduce_filter, reduce_bias, 1, 0, 1, 1) # relu y = self.fc.relu(y) # expand expand_filter = self.get_active_expand_weight(num_mid, in_channel, groups=groups).contiguous() expand_bias = self.get_active_expand_bias(in_channel, groups=groups) y = F.conv2d(y, expand_filter, expand_bias, 1, 0, 1, 1) # hard sigmoid y = self.fc.h_sigmoid(y) return x * y
def get_active_subnet(self, in_channel, preserve_weight=True): # build the new layer sub_layer = set_layer_from_config( self.get_active_subnet_config(in_channel)) sub_layer = sub_layer.to(get_net_device(self)) if not preserve_weight: return sub_layer middle_channel = self.active_middle_channel(in_channel) # copy weight from current layer if sub_layer.inverted_bottleneck is not None: sub_layer.inverted_bottleneck.conv.weight.data.copy_( self.inverted_bottleneck.conv.get_active_filter( middle_channel, in_channel).data, ) copy_bn(sub_layer.inverted_bottleneck.bn, self.inverted_bottleneck.bn.bn) sub_layer.depth_conv.conv.weight.data.copy_( self.depth_conv.conv.get_active_filter( middle_channel, self.active_kernel_size).data) copy_bn(sub_layer.depth_conv.bn, self.depth_conv.bn.bn) if self.use_se: se_mid = make_divisible(middle_channel // SEModule.REDUCTION, divisor=MyNetwork.CHANNEL_DIVISIBLE) sub_layer.depth_conv.se.fc.reduce.weight.data.copy_( self.depth_conv.se.get_active_reduce_weight( se_mid, middle_channel).data) sub_layer.depth_conv.se.fc.reduce.bias.data.copy_( self.depth_conv.se.get_active_reduce_bias(se_mid).data) sub_layer.depth_conv.se.fc.expand.weight.data.copy_( self.depth_conv.se.get_active_expand_weight( se_mid, middle_channel).data) sub_layer.depth_conv.se.fc.expand.bias.data.copy_( self.depth_conv.se.get_active_expand_bias(middle_channel).data) sub_layer.point_linear.conv.weight.data.copy_( self.point_linear.conv.get_active_filter(self.active_out_channel, middle_channel).data) copy_bn(sub_layer.point_linear.bn, self.point_linear.bn.bn) return sub_layer
def __init__(self, n_classes=1000, width_mult=1.0, bn_param=(0.1, 1e-3), dropout_rate=0.2, ks=None, expand_ratio=None, depth_param=None, stage_width_list=None): ks = 3 if ks is None else ks expand_ratio = 6 if expand_ratio is None else expand_ratio input_channel = 32 last_channel = 1280 input_channel = make_divisible(input_channel * width_mult, MyNetwork.CHANNEL_DIVISIBLE) last_channel = make_divisible(last_channel * width_mult, MyNetwork.CHANNEL_DIVISIBLE) \ if width_mult > 1.0 else last_channel inverted_residual_setting = [ # t, c, n, s [1, 16, 1, 1], [expand_ratio, 24, 2, 2], [expand_ratio, 32, 3, 2], [expand_ratio, 64, 4, 2], [expand_ratio, 96, 3, 1], [expand_ratio, 160, 3, 2], [expand_ratio, 320, 1, 1], ] if depth_param is not None: assert isinstance(depth_param, int) for i in range(1, len(inverted_residual_setting) - 1): inverted_residual_setting[i][2] = depth_param if stage_width_list is not None: for i in range(len(inverted_residual_setting)): inverted_residual_setting[i][1] = stage_width_list[i] ks = val2list(ks, sum([n for _, _, n, _ in inverted_residual_setting]) - 1) _pt = 0 # first conv layer first_conv = ConvLayer(3, input_channel, kernel_size=3, stride=2, use_bn=True, act_func='relu6', ops_order='weight_bn_act') # inverted residual blocks blocks = [] for t, c, n, s in inverted_residual_setting: output_channel = make_divisible(c * width_mult, MyNetwork.CHANNEL_DIVISIBLE) for i in range(n): if i == 0: stride = s else: stride = 1 if t == 1: kernel_size = 3 else: kernel_size = ks[_pt] _pt += 1 mobile_inverted_conv = MBConvLayer( in_channels=input_channel, out_channels=output_channel, kernel_size=kernel_size, stride=stride, expand_ratio=t, ) if stride == 1: if input_channel == output_channel: shortcut = IdentityLayer(input_channel, input_channel) else: shortcut = None else: shortcut = None blocks.append(ResidualBlock(mobile_inverted_conv, shortcut)) input_channel = output_channel # 1x1_conv before global average pooling feature_mix_layer = ConvLayer( input_channel, last_channel, kernel_size=1, use_bn=True, act_func='relu6', ops_order='weight_bn_act', ) classifier = LinearLayer(last_channel, n_classes, dropout_rate=dropout_rate) super(MobileNetV2, self).__init__(first_conv, blocks, feature_mix_layer, classifier) # set bn param self.set_bn_param(*bn_param)
def __init__(self, n_classes=1000, width_mult=1.0, bn_param=(0.1, 1e-5), dropout_rate=0, expand_ratio=None, depth_param=None): expand_ratio = 0.25 if expand_ratio is None else expand_ratio input_channel = make_divisible(64 * width_mult, MyNetwork.CHANNEL_DIVISIBLE) mid_input_channel = make_divisible(input_channel // 2, MyNetwork.CHANNEL_DIVISIBLE) stage_width_list = ResNets.STAGE_WIDTH_LIST.copy() for i, width in enumerate(stage_width_list): stage_width_list[i] = make_divisible(width * width_mult, MyNetwork.CHANNEL_DIVISIBLE) depth_list = [3, 4, 6, 3] if depth_param is not None: for i, depth in enumerate(ResNets.BASE_DEPTH_LIST): depth_list[i] = depth + depth_param stride_list = [1, 2, 2, 2] # build input stem input_stem = [ ConvLayer(3, mid_input_channel, 3, stride=2, use_bn=True, act_func='relu'), ResidualBlock( ConvLayer(mid_input_channel, mid_input_channel, 3, stride=1, use_bn=True, act_func='relu'), IdentityLayer(mid_input_channel, mid_input_channel)), ConvLayer(mid_input_channel, input_channel, 3, stride=1, use_bn=True, act_func='relu') ] # blocks blocks = [] for d, width, s in zip(depth_list, stage_width_list, stride_list): for i in range(d): stride = s if i == 0 else 1 bottleneck_block = ResNetBottleneckBlock( input_channel, width, kernel_size=3, stride=stride, expand_ratio=expand_ratio, act_func='relu', downsample_mode='avgpool_conv', ) blocks.append(bottleneck_block) input_channel = width # classifier classifier = LinearLayer(input_channel, n_classes, dropout_rate=dropout_rate) super(ResNet50D, self).__init__(input_stem, blocks, classifier) # set bn param self.set_bn_param(*bn_param)
def __init__(self, main_branch, in_channels, out_channels, expand=1.0, kernel_size=3, act_func='relu', n_groups=2, downsample_ratio=2, upsample_type='bilinear', stride=1): super(LiteResidualModule, self).__init__() self.main_branch = main_branch self.lite_residual_config = { 'in_channels': in_channels, 'out_channels': out_channels, 'expand': expand, 'kernel_size': kernel_size, 'act_func': act_func, 'n_groups': n_groups, 'downsample_ratio': downsample_ratio, 'upsample_type': upsample_type, 'stride': stride, } kernel_size = 1 if downsample_ratio is None else kernel_size padding = get_same_padding(kernel_size) if downsample_ratio is None: pooling = MyGlobalAvgPool2d() else: pooling = nn.AvgPool2d(downsample_ratio, downsample_ratio, 0) num_mid = make_divisible(int(in_channels * expand), divisor=MyNetwork.CHANNEL_DIVISIBLE) self.lite_residual = nn.Sequential( OrderedDict({ 'pooling': pooling, 'conv1': nn.Conv2d(in_channels, num_mid, kernel_size, stride, padding, groups=n_groups, bias=False), 'bn1': nn.BatchNorm2d(num_mid), 'act': build_activation(act_func), 'conv2': nn.Conv2d(num_mid, out_channels, 1, 1, 0, bias=False), 'final_bn': nn.BatchNorm2d(out_channels), })) # initialize init_models(self.lite_residual) self.lite_residual.final_bn.weight.data.zero_()
#for i, stage in enumerate(stages): # depth = sample['d'][i] # kernels = sample['ks'][4*i: 4*(i+1)] # expand_ratios = sample['e'][4*i: 4*(i+1)] out_channels = [model.blocks[i].mobile_inverted_conv.point_linear.conv.conv.out_channels for i in range(1, 21)] out_channels = [model.blocks[0].mobile_inverted_conv.point_linear.conv.out_channels] + out_channels l2_squared = np.zeros([20, 3, 3], dtype=np.float) for i in range(20): for k in [3, 5, 7]: for e in [3, 4, 6]: l2 = 0.0 module = model.blocks[i+1].mobile_inverted_conv in_channel = out_channels[i] mid_channels = make_divisible(round(in_channel * e), 8) if module.inverted_bottleneck is not None: l2 += torch.norm(module.inverted_bottleneck.conv.conv.weight[:mid_channels, :in_channel, :, :], p=1) l2 += torch.norm(module.inverted_bottleneck.bn.bn.weight[:mid_channels], p=1) if module.inverted_bottleneck.bn.bn.bias is not None: l2 += torch.norm(module.inverted_bottleneck.bn.bn.bias[:mid_channels], p=1) l2 += torch.norm(module.depth_conv.conv.get_active_filter(mid_channels, k), p=1) l2 += torch.norm(module.depth_conv.bn.bn.weight[:mid_channels], p=1) if module.depth_conv.bn.bn.bias is not None: l2 += torch.norm(module.depth_conv.bn.bn.bias[:mid_channels], p=1) if hasattr(module.depth_conv, 'se'): se_channel = make_divisible(mid_channels // module.depth_conv.se.reduction, divisor=8) l2 += torch.norm(module.depth_conv.se.fc.reduce.weight[:se_channel, :mid_channels, :, :], p=1) if module.depth_conv.se.fc.reduce.bias is not None:
def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, expand_ratio=0.25, mid_channels=None, act_func='relu', groups=1, downsample_mode='avgpool_conv'): super(ResNetBottleneckBlock, self).__init__() self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size self.stride = stride self.expand_ratio = expand_ratio self.mid_channels = mid_channels self.act_func = act_func self.groups = groups self.downsample_mode = downsample_mode if self.mid_channels is None: feature_dim = round(self.out_channels * self.expand_ratio) else: feature_dim = self.mid_channels feature_dim = make_divisible(feature_dim, MyNetwork.CHANNEL_DIVISIBLE) self.mid_channels = feature_dim # build modules self.conv1 = nn.Sequential( OrderedDict([ ('conv', nn.Conv2d(self.in_channels, feature_dim, 1, 1, 0, bias=False)), ('bn', nn.BatchNorm2d(feature_dim)), ('act', build_activation(self.act_func, inplace=True)), ])) pad = get_same_padding(self.kernel_size) self.conv2 = nn.Sequential( OrderedDict([('conv', nn.Conv2d(feature_dim, feature_dim, kernel_size, stride, pad, groups=groups, bias=False)), ('bn', nn.BatchNorm2d(feature_dim)), ('act', build_activation(self.act_func, inplace=True))])) self.conv3 = nn.Sequential( OrderedDict([ ('conv', nn.Conv2d(feature_dim, self.out_channels, 1, 1, 0, bias=False)), ('bn', nn.BatchNorm2d(self.out_channels)), ])) if stride == 1 and in_channels == out_channels: self.downsample = IdentityLayer(in_channels, out_channels) elif self.downsample_mode == 'conv': self.downsample = nn.Sequential( OrderedDict([ ('conv', nn.Conv2d(in_channels, out_channels, 1, stride, 0, bias=False)), ('bn', nn.BatchNorm2d(out_channels)), ])) elif self.downsample_mode == 'avgpool_conv': self.downsample = nn.Sequential( OrderedDict([ ('avg_pool', nn.AvgPool2d(kernel_size=stride, stride=stride, padding=0, ceil_mode=True)), ('conv', nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False)), ('bn', nn.BatchNorm2d(out_channels)), ])) else: raise NotImplementedError self.final_act = build_activation(self.act_func, inplace=True)
def __init__(self, n_classes=1000, bn_param=(0.1, 1e-3), dropout_rate=0.1, base_stage_width=None, width_mult=1.0, ks_list=3, expand_ratio_list=6, depth_list=4): self.width_mult = width_mult self.ks_list = val2list(ks_list, 1) self.expand_ratio_list = val2list(expand_ratio_list, 1) self.depth_list = val2list(depth_list, 1) self.ks_list.sort() self.expand_ratio_list.sort() self.depth_list.sort() if base_stage_width == 'google': # MobileNetV2 Stage Width base_stage_width = [32, 16, 24, 32, 64, 96, 160, 320, 1280] else: # ProxylessNAS Stage Width base_stage_width = [32, 16, 24, 40, 80, 96, 192, 320, 1280] input_channel = make_divisible(base_stage_width[0] * self.width_mult, MyNetwork.CHANNEL_DIVISIBLE) first_block_width = make_divisible(base_stage_width[1] * self.width_mult, MyNetwork.CHANNEL_DIVISIBLE) last_channel = make_divisible(base_stage_width[-1] * self.width_mult, MyNetwork.CHANNEL_DIVISIBLE) # first conv layer first_conv = ConvLayer( 3, input_channel, kernel_size=3, stride=2, use_bn=True, act_func='relu6', ops_order='weight_bn_act' ) # first block first_block_conv = MBConvLayer( in_channels=input_channel, out_channels=first_block_width, kernel_size=3, stride=1, expand_ratio=1, act_func='relu6', ) first_block = ResidualBlock(first_block_conv, None) input_channel = first_block_width # inverted residual blocks self.block_group_info = [] blocks = [first_block] _block_index = 1 stride_stages = [2, 2, 2, 1, 2, 1] n_block_list = [max(self.depth_list)] * 5 + [1] width_list = [] for base_width in base_stage_width[2:-1]: width = make_divisible(base_width * self.width_mult, MyNetwork.CHANNEL_DIVISIBLE) width_list.append(width) for width, n_block, s in zip(width_list, n_block_list, stride_stages): self.block_group_info.append([_block_index + i for i in range(n_block)]) _block_index += n_block output_channel = width for i in range(n_block): if i == 0: stride = s else: stride = 1 mobile_inverted_conv = DynamicMBConvLayer( in_channel_list=val2list(input_channel, 1), out_channel_list=val2list(output_channel, 1), kernel_size_list=ks_list, expand_ratio_list=expand_ratio_list, stride=stride, act_func='relu6', ) if stride == 1 and input_channel == output_channel: shortcut = IdentityLayer(input_channel, input_channel) else: shortcut = None mb_inverted_block = ResidualBlock(mobile_inverted_conv, shortcut) blocks.append(mb_inverted_block) input_channel = output_channel # 1x1_conv before global average pooling feature_mix_layer = ConvLayer( input_channel, last_channel, kernel_size=1, use_bn=True, act_func='relu6', ) classifier = LinearLayer(last_channel, n_classes, dropout_rate=dropout_rate) super(OFAProxylessNASNets, self).__init__(first_conv, blocks, feature_mix_layer, classifier) # set bn param self.set_bn_param(momentum=bn_param[0], eps=bn_param[1]) # runtime_depth self.runtime_depth = [len(block_idx) for block_idx in self.block_group_info]
def __init__(self, n_classes=1000, bn_param=(0.1, 1e-5), dropout_rate=0.1, base_stage_width=None, width_mult=1.0, ks_list=3, expand_ratio_list=6, depth_list=4): self.width_mult = width_mult self.ks_list = val2list(ks_list, 1) self.expand_ratio_list = val2list(expand_ratio_list, 1) self.depth_list = val2list(depth_list, 1) self.ks_list.sort() self.expand_ratio_list.sort() self.depth_list.sort() base_stage_width = [16, 16, 24, 40, 80, 112, 160, 960, 1280] final_expand_width = make_divisible( base_stage_width[-2] * self.width_mult, MyNetwork.CHANNEL_DIVISIBLE) last_channel = make_divisible(base_stage_width[-1] * self.width_mult, MyNetwork.CHANNEL_DIVISIBLE) stride_stages = [1, 2, 2, 2, 1, 2] act_stages = ['relu', 'relu', 'relu', 'h_swish', 'h_swish', 'h_swish'] se_stages = [False, False, True, False, True, True] n_block_list = [1] + [max(self.depth_list)] * 5 width_list = [] for base_width in base_stage_width[:-2]: width = make_divisible(base_width * self.width_mult, MyNetwork.CHANNEL_DIVISIBLE) width_list.append(width) input_channel, first_block_dim = width_list[0], width_list[1] # first conv layer first_conv = ConvLayer(3, input_channel, kernel_size=3, stride=2, act_func='h_swish') first_block_conv = MBConvLayer( in_channels=input_channel, out_channels=first_block_dim, kernel_size=3, stride=stride_stages[0], expand_ratio=1, act_func=act_stages[0], use_se=se_stages[0], ) first_block = ResidualBlock( first_block_conv, IdentityLayer(first_block_dim, first_block_dim) if input_channel == first_block_dim else None, ) # inverted residual blocks self.block_group_info = [] blocks = [first_block] _block_index = 1 feature_dim = first_block_dim for width, n_block, s, act_func, use_se in zip(width_list[2:], n_block_list[1:], stride_stages[1:], act_stages[1:], se_stages[1:]): self.block_group_info.append( [_block_index + i for i in range(n_block)]) _block_index += n_block output_channel = width for i in range(n_block): if i == 0: stride = s else: stride = 1 mobile_inverted_conv = DynamicMBConvLayer( in_channel_list=val2list(feature_dim), out_channel_list=val2list(output_channel), kernel_size_list=ks_list, expand_ratio_list=expand_ratio_list, stride=stride, act_func=act_func, use_se=use_se, ) if stride == 1 and feature_dim == output_channel: shortcut = IdentityLayer(feature_dim, feature_dim) else: shortcut = None blocks.append(ResidualBlock(mobile_inverted_conv, shortcut)) feature_dim = output_channel # final expand layer, feature mix layer & classifier final_expand_layer = ConvLayer(feature_dim, final_expand_width, kernel_size=1, act_func='h_swish') feature_mix_layer = ConvLayer( final_expand_width, last_channel, kernel_size=1, bias=False, use_bn=False, act_func='h_swish', ) classifier = LinearLayer(last_channel, n_classes, dropout_rate=dropout_rate) super(OFAMobileNetV3, self).__init__(first_conv, blocks, final_expand_layer, feature_mix_layer, classifier) # set bn param self.set_bn_param(momentum=bn_param[0], eps=bn_param[1]) # runtime_depth self.runtime_depth = [ len(block_idx) for block_idx in self.block_group_info ]
def active_middle_channel(self, in_channel): return make_divisible(round(in_channel * self.active_expand_ratio), MyNetwork.CHANNEL_DIVISIBLE)
def __init__(self, n_classes=1000, width_mult=1.0, bn_param=(0.1, 1e-5), dropout_rate=0.2, ks=None, expand_ratio=None, depth_param=None, stage_width_list=None): input_channel = 16 last_channel = 1280 input_channel = make_divisible(input_channel * width_mult, MyNetwork.CHANNEL_DIVISIBLE) last_channel = make_divisible(last_channel * width_mult, MyNetwork.CHANNEL_DIVISIBLE) \ if width_mult > 1.0 else last_channel cfg = { # k, exp, c, se, nl, s, e, '0': [ [3, 16, 16, False, 'relu', 1, 1], ], '1': [ [3, 64, 24, False, 'relu', 2, None], # 4 [3, 72, 24, False, 'relu', 1, None], # 3 ], '2': [ [5, 72, 40, True, 'relu', 2, None], # 3 [5, 120, 40, True, 'relu', 1, None], # 3 [5, 120, 40, True, 'relu', 1, None], # 3 ], '3': [ [3, 240, 80, False, 'h_swish', 2, None], # 6 [3, 200, 80, False, 'h_swish', 1, None], # 2.5 [3, 184, 80, False, 'h_swish', 1, None], # 2.3 [3, 184, 80, False, 'h_swish', 1, None], # 2.3 ], '4': [ [3, 480, 112, True, 'h_swish', 1, None], # 6 [3, 672, 112, True, 'h_swish', 1, None], # 6 ], '5': [ [5, 672, 160, True, 'h_swish', 2, None], # 6 [5, 960, 160, True, 'h_swish', 1, None], # 6 [5, 960, 160, True, 'h_swish', 1, None], # 6 ] } cfg = self.adjust_cfg(cfg, ks, expand_ratio, depth_param, stage_width_list) # width multiplier on mobile setting, change `exp: 1` and `c: 2` for stage_id, block_config_list in cfg.items(): for block_config in block_config_list: if block_config[1] is not None: block_config[1] = make_divisible( block_config[1] * width_mult, MyNetwork.CHANNEL_DIVISIBLE) block_config[2] = make_divisible(block_config[2] * width_mult, MyNetwork.CHANNEL_DIVISIBLE) first_conv, blocks, final_expand_layer, feature_mix_layer, classifier = self.build_net_via_cfg( cfg, input_channel, last_channel, n_classes, dropout_rate) super(MobileNetV3Large, self).__init__(first_conv, blocks, final_expand_layer, feature_mix_layer, classifier) # set bn param self.set_bn_param(*bn_param)
def active_middle_channels(self): feature_dim = round(self.active_out_channel * self.active_expand_ratio) feature_dim = make_divisible(feature_dim, MyNetwork.CHANNEL_DIVISIBLE) return feature_dim