def test_normalize_module_name(): def name_test(dataset, arch): model = create_model(False, dataset, arch, parallel=False) modelp = create_model(False, dataset, arch, parallel=True) assert model is not None and modelp is not None mod_names = [mod_name for mod_name, _ in model.named_modules()] mod_names_p = [mod_name for mod_name, _ in modelp.named_modules()] assert mod_names is not None and mod_names_p is not None assert len(mod_names) + 1 == len(mod_names_p) for i in range(len(mod_names) - 1): assert mod_names[i + 1] == normalize_module_name(mod_names_p[i + 2]) logging.debug("{} {} {}".format( mod_names_p[i + 2], mod_names[i + 1], normalize_module_name(mod_names_p[i + 2]))) assert mod_names_p[i + 2] == denormalize_module_name( modelp, mod_names[i + 1]) assert normalize_module_name("features.module.0") == "features.0" assert normalize_module_name("module.features.0") == "features.0" assert normalize_module_name("features.module") == "features" assert normalize_module_name('module') == '' assert normalize_module_name( 'no.parallel.modules') == 'no.parallel.modules' name_test('imagenet', 'vgg19') name_test('cifar10', 'resnet20_cifar') name_test('imagenet', 'alexnet')
def test_normalize_module_name(): assert "features.0" == normalize_module_name("features.module.0") assert "features.0" == normalize_module_name("module.features.0") assert "features" == normalize_module_name("features.module") name_test('imagenet', 'vgg19') name_test('cifar10', 'resnet20_cifar') name_test('imagenet', 'alexnet')
def create_thinning_recipe_channels(sgraph, model, zeros_mask_dict): """Create a recipe for removing channels from Convolution layers. The 4D weights of the model parameters (i.e. the convolution parameters) are examined one by one, to determine which has channels that are all zeros. For each weights tensor that has at least one zero-channel, we create a "thinning recipe". The thinning recipe contains meta-instructions of how the model should be changed in order to remove the channels. """ msglogger.info("Invoking create_thinning_recipe_channels") thinning_recipe = ThinningRecipe(modules={}, parameters={}) layers = {mod_name : m for mod_name, m in model.named_modules()} # Traverse all of the model's parameters, search for zero-channels, and # create a thinning recipe that descibes the required changes to the model. for param_name, param in model.named_parameters(): # We are only interested in 4D weights (of Convolution layers) if param.dim() != 4: continue num_channels = param.size(1) nonzero_channels = find_nonzero_channels(param, param_name) # If there are non-zero channels in this tensor then continue to next tensor if num_channels <= len(nonzero_channels): continue # We are removing channels, so update the number of incoming channels (IFMs) # in the convolutional layer layer_name = param_name_2_layer_name(param_name) assert isinstance(layers[layer_name], torch.nn.modules.Conv2d) append_module_directive(thinning_recipe, layer_name, key='in_channels', val=len(nonzero_channels)) # Select only the non-zero filters indices = nonzero_channels.data.squeeze() append_param_directive(thinning_recipe, param_name, (1, indices)) # Find all instances of Convolution layers that immediately preceed this layer predecessors = sgraph.predecessors_f(normalize_module_name(layer_name), ['Conv']) # Convert the layers names to PyTorch's convoluted naming scheme (when DataParallel is used) predecessors = [denormalize_module_name(model, predecessor) for predecessor in predecessors] for predecessor in predecessors: # For each of the convolutional layers that preceed, we have to reduce the number of output channels. append_module_directive(thinning_recipe, predecessor, key='out_channels', val=len(nonzero_channels)) # Now remove channels from the weights tensor of the successor conv append_param_directive(thinning_recipe, predecessor+'.weight', (0, indices)) # Now handle the BatchNormalization layer that follows the convolution bn_layers = sgraph.predecessors_f(normalize_module_name(layer_name), ['BatchNormalization']) if len(bn_layers) > 0: assert len(bn_layers) == 1 # Thinning of the BN layer that follows the convolution bn_layer_name = denormalize_module_name(model, bn_layers[0]) bn_thinning(thinning_recipe, layers, bn_layer_name, len_thin_features=len(nonzero_channels), thin_features=indices) return thinning_recipe
def get_normalized_recipe(recipe): return ThinningRecipe( modules={ distiller.normalize_module_name(k): v for k, v in recipe.modules.items() }, parameters={ distiller.normalize_module_name(k): v for k, v in recipe.parameters.items() }, )
def test_simplenet(): g = create_graph('cifar10', 'simplenet_cifar') assert g is not None preds = g.predecessors_f(normalize_module_name('module.conv1'), 'Conv') logging.debug("[simplenet_cifar]: preds of module.conv1 = {}".format(preds)) assert len(preds) == 0 preds = g.predecessors_f(normalize_module_name('module.conv2'), 'Conv') logging.debug("[simplenet_cifar]: preds of module.conv2 = {}".format(preds)) assert len(preds) == 1
def get_normalized_recipe(recipe): new_recipe = ThinningRecipe(modules={ normalize_module_name(k): v for k, v in recipe.modules.items() }, parameters={ normalize_module_name(k): v for k, v in recipe.parameters.items() }) return new_recipe
def name_test(dataset, arch): model = create_model(False, dataset, arch, parallel=False) modelp = create_model(False, dataset, arch, parallel=True) assert model is not None and modelp is not None mod_names = [mod_name for mod_name, _ in model.named_modules()] mod_names_p = [mod_name for mod_name, _ in modelp.named_modules()] assert mod_names is not None and mod_names_p is not None assert len(mod_names)+1 == len(mod_names_p) for i in range(len(mod_names)-1): assert mod_names[i+1] == normalize_module_name(mod_names_p[i+2]) logging.debug("{} {} {}".format(mod_names_p[i+2], mod_names[i+1], normalize_module_name(mod_names_p[i+2]))) assert mod_names_p[i+2] == denormalize_module_name(modelp, mod_names[i+1])
def log_model_buffers(self, model, buffer_names, tag_prefix, epoch, completed, total, freq): """Logs values of model buffers. Notes: 1. Each buffer provided in 'buffer_names' is displayed in a separate table. 2. Within each table, each value is displayed in a separate column. """ datas = {name: [] for name in buffer_names} maxlens = {name: 0 for name in buffer_names} for n, m in model.named_modules(): for buffer_name in buffer_names: try: p = getattr(m, buffer_name) except AttributeError: continue data = datas[buffer_name] values = p if isinstance(p, (list, torch.nn.ParameterList)) else p.view(-1).tolist() data.append([distiller.normalize_module_name(n) + '.' + buffer_name, *values]) maxlens[buffer_name] = max(maxlens[buffer_name], len(values)) for name in buffer_names: if datas[name]: headers = ['Layer'] + ['Val_' + str(i) for i in range(maxlens[name])] t = tabulate.tabulate(datas[name], headers=headers, tablefmt='psql', floatfmt='.4f') msglogger.info('\n' + name.upper() + ': (Epoch {0}, Step {1})\n'.format(epoch, completed) + t)
def log_model_buffers(self, model, buffer_names, tag_prefix, epoch, completed, total, freq): """Logs values of model buffers. Notes: 1. Each buffer provided is logged in a separate CSV file 2. Each CSV file is continuously updated during the run. 3. In each call, a line is appended for each layer (i.e. module) containing the named buffers. """ with ExitStack() as stack: files = {} writers = {} for buf_name in buffer_names: fname = self.get_fname(buf_name) new = not os.path.isfile(fname) files[buf_name] = stack.enter_context(open(fname, 'a')) writer = csv.writer(files[buf_name]) if new: writer.writerow(['Layer', 'Epoch', 'Step', 'Total', 'Values']) writers[buf_name] = writer for n, m in model.named_modules(): for buffer_name in buffer_names: try: p = getattr(m, buffer_name) except AttributeError: continue writer = writers[buffer_name] if isinstance(p, (list, torch.nn.ParameterList)): values = [] for v in p: values += v.view(-1).tolist() else: values = p.view(-1).tolist() writer.writerow([distiller.normalize_module_name(n) + '.' + buffer_name, epoch, completed, int(total)] + values)
def collect_conv_details(model, dataset): dummy_input = get_dummy_input(dataset) g = SummaryGraph(model.cuda(), dummy_input.cuda()) conv_layers = OrderedDict() total_macs = 0 total_nnz = 0 for id, (name, m) in enumerate(model.named_modules()): if isinstance(m, torch.nn.Conv2d): conv = SimpleNamespace() conv.t = len(conv_layers) conv.k = m.kernel_size[0] conv.stride = m.stride # Use the SummaryGraph to obtain some other details of the models conv_op = g.find_op(normalize_module_name(name)) assert conv_op is not None conv.weights_vol = conv_op['attrs']['weights_vol'] total_nnz += conv.weights_vol conv.macs = conv_op['attrs']['MACs'] conv_pname = name + ".weight" conv_p = distiller.model_find_param(model, conv_pname) conv.macs *= distiller.density_ch(conv_p) total_macs += conv.macs conv.ofm_h = g.param_shape(conv_op['outputs'][0])[2] conv.ofm_w = g.param_shape(conv_op['outputs'][0])[3] conv.ifm_h = g.param_shape(conv_op['inputs'][0])[2] conv.ifm_w = g.param_shape(conv_op['inputs'][0])[3] conv.name = name conv.id = id conv_layers[len(conv_layers)] = conv return conv_layers, total_macs, total_nnz
def collect_conv_details(model, dataset): if dataset == 'imagenet': dummy_input = torch.randn(1, 3, 224, 224) elif dataset == 'cifar10': dummy_input = torch.randn(1, 3, 32, 32) else: raise ValueError("dataset %s is not supported" % dataset) g = SummaryGraph(model.cuda(), dummy_input.cuda()) conv_layers = OrderedDict() total_macs = 0 for id, (name, m) in enumerate(model.named_modules()): if isinstance(m, torch.nn.Conv2d): conv = SimpleNamespace() conv.t = len(conv_layers) conv.k = m.kernel_size[0] conv.stride = m.stride # Use the SummaryGraph to obtain some other details of the models conv_op = g.find_op(normalize_module_name(name)) assert conv_op is not None conv.macs = conv_op['attrs']['MACs'] total_macs += conv.macs conv.ofm_h = g.param_shape(conv_op['outputs'][0])[2] conv.ofm_w = g.param_shape(conv_op['outputs'][0])[3] conv.ifm_h = g.param_shape(conv_op['inputs'][0])[2] conv.ifm_w = g.param_shape(conv_op['inputs'][0])[3] conv.name = name conv.id = id conv_layers[len(conv_layers)] = conv return conv_layers, total_macs
def successors_f(self, node_name, successors_types, done_list=None, logging=None, denorm_names=True): """Returns a list of <op>'s successors, if they match the <successors_types> criteria. Traverse the graph, starting at node <node_name>, and search for successor nodes, that have one of the node types listed in <successors_types>. If none is found, then return an empty list. <node_name> and the returned list of successors are strings, because """ node_name = distiller.normalize_module_name(node_name) node = self.find_op(node_name) node_is_an_op = True if node is None: node_is_an_op = False node = self.find_param(node_name) if node is None: msglogger.warning( "successors_f: Could not find node {}".format(node_name)) return [] if done_list is None: done_list = [] done_list.append(node_name) if not isinstance(successors_types, list): successors_types = [successors_types] if node_is_an_op: # We check if we found the type of node we're looking for, # and that this is not the first node in our search. if node['type'] in successors_types and len(done_list) > 1: return [ distiller.denormalize_module_name(self._src_model, node_name) if denorm_names else node_name ] # This is an operation node succs = [ edge.dst for edge in self.edges if (edge.src == node_name and edge.dst not in done_list) ] else: # This is a data node succs = [ edge.dst for edge in self.edges if (edge.src == node_name and edge.dst not in done_list) ] ret = [] for successor in succs: ret += self.successors_f(successor, successors_types, done_list, logging, denorm_names) return ret
def named_params_layers(self): for param_name, param in self._src_model.named_parameters(): # remove the extension of param_name, and then normalize it # to create a normalized layer name normalized_layer_name = distiller.normalize_module_name( '.'.join(param_name.split('.')[:-1])) sgraph_layer_name = distiller.denormalize_module_name( self._src_model, normalized_layer_name) yield sgraph_layer_name, param_name, param
def predecessors_f(self, node_name, predecessors_types, done_list=None, logging=None): """Returns a list of <op>'s predecessors, if they match the <predecessors_types> criteria. """ node_name = distiller.normalize_module_name(node_name) node = self.find_op(node_name) node_is_an_op = True if node is None: node_is_an_op = False node = self.find_param(node_name) if node is None: msglogger.warning( "predecessors_f: Could not find node {}".format(node_name)) return [] if done_list is None: done_list = [] done_list.append(node_name) if not isinstance(predecessors_types, list): predecessors_types = [predecessors_types] if node_is_an_op: # We check if we found the type of node we're looking for, # and that this is not the first node in our search. if node['type'] in predecessors_types and len(done_list) > 1: return [ distiller.denormalize_module_name(self._src_model, node_name) ] # This is an operation node preds = [ edge.src for edge in self.edges if (edge.dst == node_name and edge.src not in done_list) ] else: # This is a data node preds = [ edge.src for edge in self.edges if (edge.dst == node_name and edge.src not in done_list) ] ret = [] for predecessor in preds: ret += self.predecessors_f(predecessor, predecessors_types, done_list, logging) return [ distiller.denormalize_module_name(self._src_model, node) for node in ret ]
def get_model_compute_budget(model, dataset, layers_to_prune=None): """Return the compute budget of the Convolution layers in an image-classifier. """ dummy_input = distiller.get_dummy_input(dataset) g = SummaryGraph(model, dummy_input) total_macs = 0 for name, m in model.named_modules(): if isinstance(m, torch.nn.Conv2d): # Use the SummaryGraph to obtain some other details of the models conv_op = g.find_op(normalize_module_name(name)) assert conv_op is not None total_macs += conv_op['attrs']['MACs'] del g return total_macs
def make_fc(model, fc_module, g, name, seq_id, layer_id): fc = SimpleNamespace() fc.name = name fc.id = layer_id fc.t = seq_id # Use the SummaryGraph to obtain some other details of the models fc_op = g.find_op(normalize_module_name(name)) assert fc_op is not None fc.weights_vol = fc_op['attrs']['weights_vol'] fc.macs = fc_op['attrs']['MACs'] fc.n_ofm = fc_op['attrs']['n_ofm'] fc.n_ifm = fc_op['attrs']['n_ifm'] fc_pname = name + ".weight" fc_p = distiller.model_find_param(model, fc_pname) return fc
def collect_conv_details(model, dataset, perform_thinning, layers_to_prune=None): dummy_input = distiller.get_dummy_input(dataset) g = SummaryGraph(model, dummy_input) conv_layers = OrderedDict() total_macs = 0 total_params = 0 for id, (name, m) in enumerate(model.named_modules()): if isinstance(m, torch.nn.Conv2d): conv = SimpleNamespace() conv.t = len(conv_layers) conv.k = m.kernel_size[0] conv.stride = m.stride # Use the SummaryGraph to obtain some other details of the models conv_op = g.find_op(normalize_module_name(name)) assert conv_op is not None conv.weights_vol = conv_op['attrs']['weights_vol'] total_params += conv.weights_vol conv.macs = conv_op['attrs']['MACs'] conv_pname = name + ".weight" conv_p = distiller.model_find_param(model, conv_pname) if not perform_thinning: #conv.macs *= distiller.density_ch(conv_p) # Channel pruning conv.macs *= distiller.density_3D(conv_p) # Filter pruning total_macs += conv.macs conv.ofm_h = g.param_shape(conv_op['outputs'][0])[2] conv.ofm_w = g.param_shape(conv_op['outputs'][0])[3] conv.ifm_h = g.param_shape(conv_op['inputs'][0])[2] conv.ifm_w = g.param_shape(conv_op['inputs'][0])[3] conv.name = name conv.id = id if layers_to_prune is None or name in layers_to_prune: conv_layers[len(conv_layers)] = conv return conv_layers, total_macs, total_params
def make_conv(model, conv_module, g, name, seq_id, layer_id): conv = SimpleNamespace() conv.name = name conv.id = layer_id conv.t = seq_id conv.k = conv_module.kernel_size[0] conv.stride = conv_module.stride # Use the SummaryGraph to obtain some other details of the models conv_op = g.find_op(normalize_module_name(name)) assert conv_op is not None conv.weights_vol = conv_op['attrs']['weights_vol'] conv.macs = conv_op['attrs']['MACs'] conv.n_ofm = conv_op['attrs']['n_ofm'] conv.n_ifm = conv_op['attrs']['n_ifm'] conv_pname = name + ".weight" conv_p = distiller.model_find_param(model, conv_pname) conv.ofm_h = g.param_shape(conv_op['outputs'][0])[2] conv.ofm_w = g.param_shape(conv_op['outputs'][0])[3] conv.ifm_h = g.param_shape(conv_op['inputs'][0])[2] conv.ifm_w = g.param_shape(conv_op['inputs'][0])[3] return conv
def dedicated_module_check(n): module_name = self.ops[distiller.normalize_module_name( n)]['module-name'] return len(self.module_ops_map[module_name] ) == 1 or not dedicated_modules_only
def create_thinning_recipe_filters(sgraph, model, zeros_mask_dict): """Create a recipe for removing filters from Convolution layers. The 4D weights of the model parameters (i.e. the convolution parameters) are examined one by one, to determine which has filters that are all zeros. For each weights tensor that has at least one zero-filter, we create a "thinning recipe". The thinning recipe contains meta-instructions of how the model should be changed in order to remove the filters. """ msglogger.info("Invoking create_thinning_recipe_filters") msglogger.debug(sgraph.ops.keys()) thinning_recipe = ThinningRecipe(modules={}, parameters={}) layers = {mod_name: m for mod_name, m in model.named_modules()} """ log 2018-09-19 CKH 如果是1x1和dwconv3x3相连, 都要做pruning filter, 那在thinning的时候, 对于dwconv3x3, 它要按照前向结点thinning一次param, 再按本身 的recipe再thinning一次, 但是dwconv3x3实际上param只有一个维度, 比如32x32的dwconv3x3, parameter只有[32,1], 那就不可能thinning两次 因为按前向结点thinning的时候, 要thinning一个filter, in和out同时都被thinning掉了, 因为in&out之间只有一条线相连, 所以这种情况就不要做 dwconv3x3的thinning, 做1x1的thinning, 就达到对3x3 filter pruning的目的了 """ for param_name, param in model.named_parameters(): # We are only interested in 4D weights if param.dim() != 4: continue # Find the number of zero-valued filters in this weights tensor filter_view = param.view(param.size(0), -1) num_filters = filter_view.size()[0] nonzero_filters = torch.nonzero(filter_view.abs().sum(dim=1)) num_nnz_filters = nonzero_filters.nelement() if num_nnz_filters == 0: raise ValueError( "Trying to set zero filters for parameter %s is not allowed" % param_name) # If there are non-zero filters in this tensor then continue to next tensor if num_filters <= num_nnz_filters: msglogger.debug("SKipping {} shape={}".format( param_name_2_layer_name(param_name), param.shape)) continue msglogger.info("In tensor %s found %d/%d zero filters", param_name, num_filters - num_nnz_filters, num_filters) # We are removing filters, so update the number of outgoing channels (OFMs) # in the convolutional layer layer_name = param_name_2_layer_name(param_name) assert isinstance(layers[layer_name], torch.nn.modules.Conv2d) # 改变architecture的in_ch和out_ch append_module_directive(model, thinning_recipe, layer_name, key='out_channels', val=num_nnz_filters) # Select only the non-zero filters indices = nonzero_filters.data.squeeze() # 改变parameter tensor的维度大小 append_param_directive(thinning_recipe, param_name, (0, indices)) if layers[layer_name].bias is not None: # This convolution has bias coefficients append_param_directive(thinning_recipe, layer_name + '.bias', (0, indices)) # Find all instances of Convolution or FC (GEMM) layers that immediately follow this layer msglogger.debug("{} => {}".format(layer_name, normalize_module_name(layer_name))) # Add type name before put in successors_f 2018-09-19 CKH norm_module_name = normalize_module_name(layer_name) # 可以考虑用isinstance(layers[successor], torch.nn.modules.Conv2d)的方式来判断module的type 2018-09-19 CKH if isinstance(layers[norm_module_name], torch.nn.modules.Conv2d): norm_module_name += '.Conv' successors = sgraph.successors_f(norm_module_name, ['Conv', 'Gemm']) # Convert the layers names to PyTorch's convoluted naming scheme (when DataParallel is used) successors = ['.'.join(succs.split('.')[0:-1]) for succs in successors] successors = [ denormalize_module_name(model, successor) for successor in successors ] for successor in successors: if isinstance(layers[successor], torch.nn.modules.Conv2d): # 如果遇到successor是dwconv, 需要把dw的out_ch也改掉(只改architecture), 同时把dwconv的后向结点的in_ch也改掉 # (architecture和parameter维度都要改), 先默认dwcon不会紧跟一个dwconv 2018-09-19 CKH successor_norm_module_name = normalize_module_name(successor) if isinstance(layers[successor_norm_module_name], torch.nn.modules.Conv2d): successor_norm_module_name += '.Conv' if layers[successor].groups == layers[successor].in_channels: append_module_directive(model, thinning_recipe, successor, key='in_channels', val=num_nnz_filters) append_module_directive(model, thinning_recipe, successor, key='out_channels', val=num_nnz_filters) layers[successor].groups = num_nnz_filters msglogger.debug( "[recipe] {}: setting in_channels = {}".format( successor, num_nnz_filters)) # Now remove channels from the weights tensor of the successor conv append_param_directive( thinning_recipe, denormalize_module_name(model, successor) + '.weight', (0, indices)) # 对于1x1后面的depthwise3x3, 在前向1x1的output_channel被剪掉后, 需要①剪掉input_channel②剪掉output_channel # ③剪掉bias, 这三个操作的mask和1x1的mask都一样, 也就是1x1的output_channel一动, 跟着后面depthwise3x3全部要动 # 2018-09-20 CKH if layers[successor].bias is not None: # This convolution has bias coefficients append_param_directive(thinning_recipe, successor + '.bias', (0, indices)) successors2 = sgraph.successors_f( successor_norm_module_name, ['Conv', 'Gemm']) # Convert the layers names to PyTorch's convoluted naming scheme (when DataParallel is used) successors2 = [ '.'.join(succs.split('.')[0:-1]) for succs in successors2 ] successors2 = [ denormalize_module_name(model, successor) for successor in successors2 ] for successor2 in successors2: if isinstance(layers[successor2], torch.nn.modules.Conv2d): append_module_directive(model, thinning_recipe, successor2, key='in_channels', val=num_nnz_filters) msglogger.debug( "[recipe] {}: setting in_channels = {}".format( successor, num_nnz_filters)) append_param_directive( thinning_recipe, denormalize_module_name(model, successor2) + '.weight', (1, indices)) else: # For each of the convolutional layers that follow, we have to reduce the number of input channels. append_module_directive(model, thinning_recipe, successor, key='in_channels', val=num_nnz_filters) msglogger.debug( "[recipe] {}: setting in_channels = {}".format( successor, num_nnz_filters)) # Now remove channels from the weights tensor of the successor conv append_param_directive( thinning_recipe, denormalize_module_name(model, successor) + '.weight', (1, indices)) elif isinstance(layers[successor], torch.nn.modules.Linear): # If a Linear (Fully-Connected) layer follows, we need to update it's in_features member fm_size = layers[successor].in_features // layers[ layer_name].out_channels in_features = fm_size * num_nnz_filters append_module_directive(model, thinning_recipe, successor, key='in_features', val=in_features) msglogger.debug("[recipe] {}: setting in_features = {}".format( successor, in_features)) # Now remove channels from the weights tensor of the successor FC layer: # This is a bit tricky: fm_height = fm_width = int(math.sqrt(fm_size)) view_4D = (layers[successor].out_features, layers[layer_name].out_channels, fm_height, fm_width) view_2D = (layers[successor].out_features, in_features) append_param_directive( thinning_recipe, denormalize_module_name(model, successor) + '.weight', (1, indices, view_4D, view_2D)) # Now handle the BatchNormalization layer that follows the convolution bn_layers = sgraph.successors_f(normalize_module_name(layer_name), ['BatchNormalization']) if len(bn_layers) > 0: assert len(bn_layers) == 1 # Thinning of the BN layer that follows the convolution bn_layer_name = denormalize_module_name(model, bn_layers[0]) bn_thinning(thinning_recipe, layers, bn_layer_name, len_thin_features=num_nnz_filters, thin_features=indices) return thinning_recipe
def create_thinning_recipe_filters(sgraph, model, zeros_mask_dict): """Create a recipe for removing filters from Convolution layers. The 4D weights of the model parameters (i.e. the convolution parameters) are examined one by one, to determine which has filters that are all zeros. For each weights tensor that has at least one zero-filter, we create a "thinning recipe". The thinning recipe contains meta-instructions of how the model should be changed in order to remove the filters. """ msglogger.info("Invoking create_thinning_recipe_filters") thinning_recipe = ThinningRecipe(modules={}, parameters={}) layers = {mod_name: m for mod_name, m in model.named_modules()} for param_name, param in model.named_parameters(): # We are only interested in 4D weights if param.dim() != 4: continue # Find the number of zero-valued filters in this weights tensor filter_view = param.view(param.size(0), -1) num_filters = filter_view.size()[0] nonzero_filters = torch.nonzero(filter_view.abs().sum(dim=1)) num_nnz_filters = nonzero_filters.nelement() if num_nnz_filters == 0: raise ValueError( "Trying to set zero filters for parameter %s is not allowed" % param_name) # If there are non-zero filters in this tensor then continue to next tensor if num_filters <= num_nnz_filters: msglogger.debug("SKipping {} shape={}".format( param_name_2_layer_name(param_name), param.shape)) continue msglogger.info("In tensor %s found %d/%d zero filters", param_name, num_filters - num_nnz_filters, num_filters) # We are removing filters, so update the number of outgoing channels (OFMs) # in the convolutional layer layer_name = param_name_2_layer_name(param_name) assert isinstance(layers[layer_name], torch.nn.modules.Conv2d) append_module_directive(model, thinning_recipe, layer_name, key='out_channels', val=num_nnz_filters) # Select only the non-zero filters indices = nonzero_filters.data.squeeze() append_param_directive(thinning_recipe, param_name, (0, indices)) if layers[layer_name].bias is not None: # This convolution has bias coefficients append_param_directive(thinning_recipe, layer_name + '.bias', (0, indices)) # Find all instances of Convolution or FC (GEMM) layers that immediately follow this layer successors = sgraph.successors_f(normalize_module_name(layer_name), ['Conv', 'Gemm']) # Convert the layers names to PyTorch's convoluted naming scheme (when DataParallel is used) successors = [ denormalize_module_name(model, successor) for successor in successors ] for successor in successors: if isinstance(layers[successor], torch.nn.modules.Conv2d): # For each of the convolutional layers that follow, we have to reduce the number of input channels. append_module_directive(model, thinning_recipe, successor, key='in_channels', val=num_nnz_filters) msglogger.debug("[recipe] {}: setting in_channels = {}".format( successor, num_nnz_filters)) # Now remove channels from the weights tensor of the successor conv append_param_directive( thinning_recipe, denormalize_module_name(model, successor) + '.weight', (1, indices)) elif isinstance(layers[successor], torch.nn.modules.Linear): # If a Linear (Fully-Connected) layer follows, we need to update it's in_features member fm_size = layers[successor].in_features // layers[ layer_name].out_channels in_features = fm_size * num_nnz_filters append_module_directive(model, thinning_recipe, successor, key='in_features', val=in_features) msglogger.debug("[recipe] {}: setting in_features = {}".format( successor, in_features)) # Now remove channels from the weights tensor of the successor FC layer: # This is a bit tricky: fm_height = fm_width = int(math.sqrt(fm_size)) view_4D = (layers[successor].out_features, layers[layer_name].out_channels, fm_height, fm_width) view_2D = (layers[successor].out_features, in_features) append_param_directive( thinning_recipe, denormalize_module_name(model, successor) + '.weight', (1, indices, view_4D, view_2D)) # Now handle the BatchNormalization layer that follows the convolution bn_layers = sgraph.successors_f(normalize_module_name(layer_name), ['BatchNormalization']) if len(bn_layers) > 0: assert len(bn_layers) == 1 # Thinning of the BN layer that follows the convolution bn_layer_name = denormalize_module_name(model, bn_layers[0]) bn_thinning(thinning_recipe, layers, bn_layer_name, len_thin_features=num_nnz_filters, thin_features=indices) return thinning_recipe
def find_op(self, lost_op_name): return self.ops.get(distiller.normalize_module_name(lost_op_name), None)
def create_thinning_recipe_channels(sgraph, model, zeros_mask_dict): """Create a recipe for removing channels from Convolution layers. The 4D weights of the model parameters (i.e. the convolution parameters) are examined one by one, to determine which has channels that are all zeros. For each weights tensor that has at least one zero-channel, we create a "thinning recipe". The thinning recipe contains meta-instructions of how the model should be changed in order to remove the channels. """ msglogger.info("Invoking create_thinning_recipe_channels") thinning_recipe = ThinningRecipe(modules={}, parameters={}) layers = {mod_name: m for mod_name, m in model.named_modules()} # Traverse all of the model's parameters, search for zero-channels, and # create a thinning recipe that descibes the required changes to the model. for param_name, param in model.named_parameters(): # We are only interested in 4D weights (of Convolution layers) if param.dim() != 4: continue num_channels = param.size(1) nonzero_channels = find_nonzero_channels(param, param_name) num_nnz_channels = nonzero_channels.nelement() if num_nnz_channels == 0: raise ValueError( "Trying to set zero channels for parameter %s is not allowed" % param_name) # If there are non-zero channels in this tensor then continue to next tensor if num_channels <= num_nnz_channels: continue # We are removing channels, so update the number of incoming channels (IFMs) # in the convolutional layer layer_name = param_name_2_layer_name(param_name) assert isinstance(layers[layer_name], torch.nn.modules.Conv2d) append_module_directive(model, thinning_recipe, layer_name, key='in_channels', val=num_nnz_channels) # Select only the non-zero filters indices = nonzero_channels.data.squeeze() append_param_directive(thinning_recipe, param_name, (1, indices)) # Find all instances of Convolution layers that immediately preceed this layer predecessors = sgraph.predecessors_f(normalize_module_name(layer_name), ['Conv']) # Convert the layers names to PyTorch's convoluted naming scheme (when DataParallel is used) predecessors = [ normalize_module_name(predecessor) for predecessor in predecessors ] if len(predecessors) == 0: msglogger.info( "Could not find predecessors for name={} normal={} {}".format( layer_name, normalize_module_name(layer_name), denormalize_module_name(model, layer_name))) for predecessor in predecessors: # For each of the convolutional layers that preceed, we have to reduce the number of output channels. append_module_directive(model, thinning_recipe, predecessor, key='out_channels', val=num_nnz_channels) # Now remove channels from the weights tensor of the predecessor conv append_param_directive( thinning_recipe, denormalize_module_name(model, predecessor) + '.weight', (0, indices)) if layers[denormalize_module_name(model, predecessor)].bias is not None: # This convolution has bias coefficients append_param_directive( thinning_recipe, denormalize_module_name(model, predecessor) + '.bias', (0, indices)) # Now handle the BatchNormalization layer that follows the convolution bn_layers = sgraph.predecessors_f(normalize_module_name(layer_name), ['BatchNormalization']) if len(bn_layers) > 0: # if len(bn_layers) != 1: # raise RuntimeError("{} should have exactly one BN predecessors, but has {}".format(layer_name, len(bn_layers))) for bn_layer in bn_layers: # Thinning of the BN layer that follows the convolution bn_layer_name = denormalize_module_name(model, bn_layer) msglogger.debug( "[recipe] {}: predecessor BN module = {}".format( layer_name, bn_layer_name)) append_bn_thinning_directive( thinning_recipe, layers, bn_layer_name, len_thin_features=num_nnz_channels, thin_features=indices) msglogger.debug(thinning_recipe) return thinning_recipe