def get_active_subnet(self, in_channel, preserve_weight=True): # build the new layer sub_layer = set_layer_from_config( self.get_active_subnet_config(in_channel)) sub_layer = sub_layer.to(get_net_device(self)) if not preserve_weight: return sub_layer # copy weight from current layer sub_layer.conv1.conv.weight.data.copy_( self.conv1.conv.get_active_filter(self.active_middle_channels, in_channel).data) copy_bn(sub_layer.conv1.bn, self.conv1.bn.bn) sub_layer.conv2.conv.weight.data.copy_( self.conv2.conv.get_active_filter( self.active_middle_channels, self.active_middle_channels).data) copy_bn(sub_layer.conv2.bn, self.conv2.bn.bn) sub_layer.conv3.conv.weight.data.copy_( self.conv3.conv.get_active_filter( self.active_out_channel, self.active_middle_channels).data) copy_bn(sub_layer.conv3.bn, self.conv3.bn.bn) if not isinstance(self.downsample, IdentityLayer): sub_layer.downsample.conv.weight.data.copy_( self.downsample.conv.get_active_filter(self.active_out_channel, in_channel).data) copy_bn(sub_layer.downsample.bn, self.downsample.bn.bn) return sub_layer
def get_active_subnet(self, in_channel, preserve_weight=True): sub_layer = set_layer_from_config(self.get_active_subnet_config(in_channel)) sub_layer = sub_layer.to(get_net_device(self)) if not preserve_weight: return sub_layer sub_layer.conv.weight.data.copy_(self.conv.get_active_filter(self.active_out_channel, in_channel).data) if self.use_bn: copy_bn(sub_layer.bn, self.bn.bn) return sub_layer
def get_active_subnet(self, in_features, preserve_weight=True): sub_layer = LinearLayer(in_features, self.out_features, self.bias, dropout_rate=self.dropout_rate) sub_layer = sub_layer.to(get_net_device(self)) if not preserve_weight: return sub_layer sub_layer.linear.weight.data.copy_( self.linear.get_active_weight(self.out_features, in_features).data ) if self.bias: sub_layer.linear.bias.data.copy_( self.linear.get_active_bias(self.out_features).data ) return sub_layer
def get_active_subnet(self, in_channel, preserve_weight=True): # build the new layer sub_layer = set_layer_from_config( self.get_active_subnet_config(in_channel)) sub_layer = sub_layer.to(get_net_device(self)) if not preserve_weight: return sub_layer middle_channel = self.active_middle_channel(in_channel) # copy weight from current layer if sub_layer.inverted_bottleneck is not None: sub_layer.inverted_bottleneck.conv.weight.data.copy_( self.inverted_bottleneck.conv.get_active_filter( middle_channel, in_channel).data, ) copy_bn(sub_layer.inverted_bottleneck.bn, self.inverted_bottleneck.bn.bn) sub_layer.depth_conv.conv.weight.data.copy_( self.depth_conv.conv.get_active_filter( middle_channel, self.active_kernel_size).data) copy_bn(sub_layer.depth_conv.bn, self.depth_conv.bn.bn) if self.use_se: se_mid = make_divisible(middle_channel // SEModule.REDUCTION, divisor=MyNetwork.CHANNEL_DIVISIBLE) sub_layer.depth_conv.se.fc.reduce.weight.data.copy_( self.depth_conv.se.get_active_reduce_weight( se_mid, middle_channel).data) sub_layer.depth_conv.se.fc.reduce.bias.data.copy_( self.depth_conv.se.get_active_reduce_bias(se_mid).data) sub_layer.depth_conv.se.fc.expand.weight.data.copy_( self.depth_conv.se.get_active_expand_weight( se_mid, middle_channel).data) sub_layer.depth_conv.se.fc.expand.bias.data.copy_( self.depth_conv.se.get_active_expand_bias(middle_channel).data) sub_layer.point_linear.conv.weight.data.copy_( self.point_linear.conv.get_active_filter(self.active_out_channel, middle_channel).data) copy_bn(sub_layer.point_linear.bn, self.point_linear.bn.bn) return sub_layer
def set_running_statistics(model, data_loader, distributed=False): bn_mean = {} bn_var = {} forward_model = copy.deepcopy(model) for name, m in forward_model.named_modules(): if isinstance(m, nn.BatchNorm2d): if distributed: bn_mean[name] = DistributedTensor(name + '#mean') bn_var[name] = DistributedTensor(name + '#var') else: bn_mean[name] = AverageMeter() bn_var[name] = AverageMeter() def new_forward(bn, mean_est, var_est): def lambda_forward(x): batch_mean = x.mean(0, keepdim=True).mean( 2, keepdim=True).mean(3, keepdim=True) # 1, C, 1, 1 batch_var = (x - batch_mean) * (x - batch_mean) batch_var = batch_var.mean(0, keepdim=True).mean( 2, keepdim=True).mean(3, keepdim=True) batch_mean = torch.squeeze(batch_mean) batch_var = torch.squeeze(batch_var) mean_est.update(batch_mean.data, x.size(0)) var_est.update(batch_var.data, x.size(0)) # bn forward using calculated mean & var _feature_dim = batch_mean.size(0) return F.batch_norm( x, batch_mean, batch_var, bn.weight[:_feature_dim], bn.bias[:_feature_dim], False, 0.0, bn.eps, ) return lambda_forward m.forward = new_forward(m, bn_mean[name], bn_var[name]) if len(bn_mean) == 0: # skip if there is no batch normalization layers in the network return with torch.no_grad(): DynamicBatchNorm2d.SET_RUNNING_STATISTICS = True for images, labels in data_loader: images = images.to(get_net_device(forward_model)) forward_model(images) DynamicBatchNorm2d.SET_RUNNING_STATISTICS = False for name, m in model.named_modules(): if name in bn_mean and bn_mean[name].count > 0: feature_dim = bn_mean[name].avg.size(0) assert isinstance(m, nn.BatchNorm2d) m.running_mean.data[:feature_dim].copy_(bn_mean[name].avg) m.running_var.data[:feature_dim].copy_(bn_var[name].avg)