def get_channels_l1_norm(model, layer, node_indices=None): if isinstance(layer, str): layer = model.get_layer(name=layer) # Check that layer is in the model if layer not in model.layers: raise ValueError('layer is not a valid Layer in model.') layer_node_indices = utils.find_nodes_in_model(model, layer) print('layer:', layer, layer_node_indices) # If no nodes are specified, all of the layer's inbound nodes which are # in model are selected. if not node_indices: node_indices = layer_node_indices # Check for duplicate node indices elif len(node_indices) != len(set(node_indices)): raise ValueError('`node_indices` contains duplicate values.') # Check that all of the selected nodes are in the layer elif not set(node_indices).issubset(layer_node_indices): raise ValueError('One or more nodes specified by `layer` and ' '`node_indices` are not in `model`.') data_format = getattr(layer, 'data_format', 'channels_last') w = layer.get_weights()[0] if data_format == 'channels_first': w = np.swapaxes(w, 1, -1) importances = abs(w).sum(axis=(0, 1, 2)) print(w.shape, importances.shape) return importances
def get_channels_apoz_importance(model, layer, x_val, node_indices=None): if isinstance(layer, str): layer = model.get_layer(name=layer) # Check that layer is in the model if layer not in model.layers: raise ValueError('layer is not a valid Layer in model.') layer_node_indices = utils.find_nodes_in_model(model, layer) # If no nodes are specified, all of the layer's inbound nodes which are # in model are selected. if not node_indices: node_indices = layer_node_indices # Check for duplicate node indices elif len(node_indices) != len(set(node_indices)): raise ValueError('`node_indices` contains duplicate values.') # Check that all of the selected nodes are in the layer elif not set(node_indices).issubset(layer_node_indices): raise ValueError('One or more nodes specified by `layer` and ' '`node_indices` are not in `model`.') data_format = getattr(layer, 'data_format', 'channels_last') # Perform the forward pass and get the activations of the layer. mean_calculator = utils.MeanCalculator(sum_axis=0) print('layer:', layer, layer_node_indices, node_indices) for node_index in node_indices: act_layer, act_index = utils.find_activation_layer(layer, node_index) print('act layer', act_layer, act_index) # Get activations if hasattr(x_val, "__iter__"): temp_model = Model(model.inputs, act_layer.get_output_at(act_index)) print('before: act output', act_layer.get_output_at(act_index)) a = temp_model.predict(x_val) #a=temp_model.predict_generator(x_val, x_val.n // x_val.batch_size) print('after:', layer, a.shape) else: get_activations = K.function( [single_element(model.inputs), K.learning_phase()], [act_layer.get_output_at(act_index)]) a = get_activations([x_val, 0])[0] # Ensure that the channels axis is last if data_format == 'channels_first': a = np.swapaxes(a, 1, -1) # Flatten all except channels axis activations = np.reshape(a, [-1, a.shape[-1]]) zeros = (activations == 0).astype(int) mean_calculator.add(zeros) return mean_calculator.calculate()
def get_apoz(model, layer, x_val, node_indices=None, batch_size=1): """Identify neurons with high Average Percentage of Zeros (APoZ). The APoZ a.k.a. (A)verage (P)ercentage (o)f activations equal to (Z)ero, is a metric for the usefulness of a channel defined in this paper: "Network Trimming: A Data-Driven Neuron Pruning Approach towards Efficient Deep Architectures" - [Hu et al. (2016)][] `high_apoz()` enables the pruning methodology described in this paper to be replicated. If node_indices are not specified and the layer is shared within the model the APoZ will be calculated over all instances of the shared layer. Args: model: A Keras model. layer: The layer whose channels will be evaluated for pruning. x_val: The input of the validation set. This will be used to calculate the activations of the layer of interest. node_indices(list[int]): (optional) A list of node indices. Returns: List of the APoZ values for each channel in the layer. """ if isinstance(layer, str): layer = model.get_layer(name=layer) # Check that layer is in the model if layer not in model.layers: raise ValueError('layer is not a valid Layer in model.') layer_node_indices = utils.find_nodes_in_model(model, layer) # If no nodes are specified, all of the layer's inbound nodes which are # in model are selected. if not node_indices: node_indices = layer_node_indices # Check for duplicate node indices elif len(node_indices) != len(set(node_indices)): raise ValueError('`node_indices` contains duplicate values.') # Check that all of the selected nodes are in the layer elif not set(node_indices).issubset(layer_node_indices): raise ValueError('One or more nodes specified by `layer` and ' '`node_indices` are not in `model`.') data_format = getattr(layer, 'data_format', 'channels_last') # Perform the forward pass and get the activations of the layer. mean_calculator = utils.MeanCalculator(sum_axis=0) for node_index in node_indices: act_layer, act_index = utils.find_activation_layer(layer, node_index) # Get activations if isinstance(x_val, np.ndarray): temp_model = Model(model.inputs, act_layer.get_output_at(act_index)) a = temp_model.predict_generator( x_val, x_val.shape[0] // batch_size) elif hasattr(x_val, "__iter__"): temp_model = Model(model.inputs, act_layer.get_output_at(act_index)) a = temp_model.predict_generator( x_val, x_val.n // batch_size) else: get_activations = k.function( [utils.single_element(model.inputs), k.learning_phase()], [act_layer.get_output_at(act_index)]) a = get_activations([x_val, 0])[0] # Ensure that the channels axis is last if data_format == 'channels_first': a = np.swapaxes(a, 1, -1) # Flatten all except channels axis activations = np.reshape(a, [-1, a.shape[-1]]) zeros = (activations == 0).astype(int) mean_calculator.add(zeros) return mean_calculator.calculate()
def add_job(self, job, layer, channels=None, new_layer=None): """Adds a job for the Surgeon to perform on the model. Job options are: 'delete_layer': delete `layer` from the model required keyword arguments: None 'insert_layer': insert `new_layer` before `layer` required keyword arguments: `new_layer` 'replace_layer': replace `layer` with `new_layer` required keyword arguments: `new_layer` 'delete_channels': delete `channels` from `layer` required keyword arguments: `channels` Jobs can be added in any order. They will be performed in order of decreasing network depth. A maximum of one job can be performed per node. Args: job(string): job identifier. One of `Surgeon.valid_jobs`. layer(Layer): A layer from `model` to be modified. channels(list[int]): A list of channels used for the job. Used in `delete_channels`. new_layer(Layer): A new layer used for the job. Used in `insert_layer` and `replace_layer`. node_indices(list[int]): (optional) A list of node indices used to selectively apply the job to a subset of the layer's nodes. Nodes are selected with: node[i] = layer.inbound_nodes[node_indices[i]] """ # If the model has been copied, identify `layer` in the copied model. if self._copy: layer = self.model.get_layer(layer.name) # Check that layer is in the model if layer not in self.model.layers: raise ValueError('layer is not a valid Layer in model.') layer_node_indices = utils.find_nodes_in_model(self.model, layer) # If no nodes are specified, all of the layer's inbound nodes which are # in model are selected. node_indices = layer_node_indices # Select the modification function and any keyword arguments. kwargs = {} if job == 'delete_channels': kwargs['channels'] = channels mod_func = self._delete_channels elif job == 'delete_layer': mod_func = self._delete_layer elif job == 'insert_layer': kwargs['new_layer'] = new_layer mod_func = self._insert_layer elif job == 'replace_layer': kwargs['new_layer'] = new_layer mod_func = self._replace_layer else: raise ValueError( job + ' is not a recognised job. Valid jobs ' 'are:\n-', '\n- '.join(self.valid_jobs)) # Get nodes to be operated on for this job job_nodes = [] for node_index in node_indices: job_nodes.append(layer._inbound_nodes[node_index]) # Check that the nodes do not already have jobs assigned to them. if set(job_nodes).intersection(self.nodes): raise ValueError('Cannot apply several jobs to the same node.') # Add the modification function and keyword arguments to the # self._mod_func_map and _kwargs_map dictionaries for later retrieval. for node in job_nodes: self._mod_func_map[node] = mod_func self._kwargs_map[node] = kwargs self.nodes.extend(job_nodes)
def get_channels_importance_with_gradient(model, layer, x_val, y, node_indices=None): if isinstance(layer, str): layer = model.get_layer(name=layer) # Check that layer is in the model if layer not in model.layers: raise ValueError('layer is not a valid Layer in model.') layer_node_indices = utils.find_nodes_in_model(model, layer) print('layer:', layer, layer_node_indices) # If no nodes are specified, all of the layer's inbound nodes which are # in model are selected. if not node_indices: node_indices = layer_node_indices # Check for duplicate node indices elif len(node_indices) != len(set(node_indices)): raise ValueError('`node_indices` contains duplicate values.') # Check that all of the selected nodes are in the layer elif not set(node_indices).issubset(layer_node_indices): raise ValueError('One or more nodes specified by `layer` and ' '`node_indices` are not in `model`.') data_format = getattr(layer, 'data_format', 'channels_last') # Perform the forward pass and get the activations of the layer. importances = [] print('layer:', layer, layer_node_indices, node_indices) if len(node_indices) > 1: print('ERROR!!!!!!!!!!!!!!!!!!!!!!!!') # Get activations if hasattr(x_val, "__iter__"): temp_model = Model(model.inputs, layer.output) print('before: act output', layer.output) a = temp_model.predict(x_val) grads = K.gradients(model.total_loss, layer.output)[0] input_tensors = [ model.inputs[0], # input data model.sample_weights[0], # how much to weight each sample by model.targets[0], # labels K.learning_phase(), # train or test mode ] if K.image_data_format() == 'channels_first': dimensions = (1, 0, 2, 3) else: dimensions = (3, 0, 1, 2) acts = K.permute_dimensions(layer.output, dimensions) grads = K.permute_dimensions(grads, dimensions) grads_shape = K.int_shape(grads) #print(x_shape) grads = K.reshape( grads, (grads_shape[0], -1, grads_shape[2] * grads_shape[3])) grads = K.sum(K.abs(grads), axis=2) acts = K.reshape(acts, (grads_shape[0], -1, grads_shape[2] * grads_shape[3])) func = K.function(input_tensors, [acts, grads]) print('before: act output', layer.output) delta = 32 importances = None for i in range(0, x_val.shape[0], delta): x_part = x_val[i:i + delta] y_part = y[i:i + delta] a, g = func([x_part, np.ones(x_part.shape[0]), y_part, 0]) #print('after:',a.shape,g.shape,data_format) num_channels = a.shape[0] if importances is None: importances = np.zeros(num_channels) for channel in range(num_channels): activations = a[channel] activations = np.reshape(activations, [activations.shape[0], -1]) #print('after:',layer,activations.shape) #pair_dist=activations pair_dist = pairwise_distances(activations) weighted_pair_dist = pair_dist * np.transpose(g[channel]) if True: importance = (abs(weighted_pair_dist)).sum() else: indices = np.argsort(weighted_pair_dist, axis=1) same_class = (y_part[indices[:, 1:]] == y_part.reshape( y_part.shape[0], 1)) other_indices = indices[:, 1:] first_same_class = np.argmax(same_class, axis=1) first_other_class = np.argmin(same_class, axis=1) #importance=sum([pair_dist[i,other_indices[i,first_same_class[i]]]/pair_dist[i,other_indices[i,first_other_class[i]]] for i in range(y.shape[0])])/y.shape[0] #importance=sum([pair_dist[i,other_indices[i,first_same_class[i]]] for i in range(y_part.shape[0])])/y_part.shape[0] importance = sum([ weighted_pair_dist[i, other_indices[i, first_other_class[i]]] for i in range(y_part.shape[0]) ]) / y_part.shape[0] importances[channel] += importance print('after:', importances.shape, layer.output.shape, data_format) #sys.exit(0) return importances
def get_channels_gradients(model, layer, x_val, y, node_indices=None): if isinstance(layer, str): layer = model.get_layer(name=layer) # Check that layer is in the model if layer not in model.layers: raise ValueError('layer is not a valid Layer in model.') layer_node_indices = utils.find_nodes_in_model(model, layer) print('layer:', layer, layer_node_indices) # If no nodes are specified, all of the layer's inbound nodes which are # in model are selected. if not node_indices: node_indices = layer_node_indices # Check for duplicate node indices elif len(node_indices) != len(set(node_indices)): raise ValueError('`node_indices` contains duplicate values.') # Check that all of the selected nodes are in the layer elif not set(node_indices).issubset(layer_node_indices): raise ValueError('One or more nodes specified by `layer` and ' '`node_indices` are not in `model`.') data_format = getattr(layer, 'data_format', 'channels_last') # Perform the forward pass and get the activations of the layer. importances = [] print('layer:', layer, layer_node_indices, node_indices) if len(node_indices) > 1: print('ERROR!!!!!!!!!!!!!!!!!!!!!!!!') # Get activations if hasattr(x_val, "__iter__"): grads = K.gradients(model.total_loss, layer.output)[0] input_tensors = [ model.inputs[0], # input data model.sample_weights[0], # how much to weight each sample by model.targets[0], # labels K.learning_phase(), # train or test mode ] if False: mul_a_grads_tensor = K.mean(layer.output, axis=0) * K.mean(grads, axis=0) if K.image_data_format() != 'channels_first': x = K.permute_dimensions(mul_a_grads_tensor, (2, 0, 1)) x_shape = K.int_shape(x) #print(x_shape) x = K.reshape(x, (x_shape[0], x_shape[1] * x_shape[2])) x = K.sum(x, axis=1) x = K.abs(x) else: mul_a_grads_tensor = layer.output * grads if K.image_data_format() == 'channels_first': x = K.permute_dimensions(mul_a_grads_tensor, (1, 0, 2, 3)) else: x = K.permute_dimensions(mul_a_grads_tensor, (3, 0, 1, 2)) x_shape = K.int_shape(x) #print(x_shape) x = K.reshape(x, (x_shape[0], -1, x_shape[2] * x_shape[3])) x = K.sum(x, axis=2) x = K.abs(x) x = K.sum(x, axis=1) func = K.function(input_tensors, [x]) print('before: act output', layer.output) delta = 32 importances = None for i in range(0, x_val.shape[0], delta): x = x_val[i:i + delta] q_part = func([x, np.ones(x.shape[0]), y[i:i + delta], 0])[0] if importances is None: importances = q_part.copy() else: importances += q_part print('after:', importances.shape, layer.output.shape, data_format) return importances
def get_channels_loss(model, layer, x_val, y, node_indices=None): if isinstance(layer, str): layer = model.get_layer(name=layer) # Check that layer is in the model if layer not in model.layers: raise ValueError('layer is not a valid Layer in model.') layer_node_indices = utils.find_nodes_in_model(model, layer) print('layer:', layer, layer_node_indices) # If no nodes are specified, all of the layer's inbound nodes which are # in model are selected. if not node_indices: node_indices = layer_node_indices # Check for duplicate node indices elif len(node_indices) != len(set(node_indices)): raise ValueError('`node_indices` contains duplicate values.') # Check that all of the selected nodes are in the layer elif not set(node_indices).issubset(layer_node_indices): raise ValueError('One or more nodes specified by `layer` and ' '`node_indices` are not in `model`.') data_format = getattr(layer, 'data_format', 'channels_last') # Perform the forward pass and get the activations of the layer. importances = [] print('layer:', layer, layer_node_indices, node_indices) if len(node_indices) > 1: print('ERROR!!!!!!!!!!!!!!!!!!!!!!!!') # Get activations if hasattr(x_val, "__iter__"): temp_model = Model(model.inputs, layer.output) func = K.function([layer.output, model.input], [model.output]) print('before: act output', layer.output) a = temp_model.predict(x_val) print(a.shape, layer.output.shape) #a=temp_model.predict_generator(x_val, x_val.n // x_val.batch_size) print('after:', layer, a.shape, data_format) # Flatten all except channels axis for channel in range(a.shape[-1]): activations = a[..., channel] #print('after:',layer,activations.shape) if True: a_new = a.copy() if data_format == 'channels_first': mean_activation = abs(a_new[:, channel]).mean() a_new[:, channel] = 0 else: mean_activation = abs(a_new[..., channel]).mean() a_new[..., channel] = 0 else: a_new = np.zeros(a.shape) if data_format == 'channels_first': mean_activation = abs(a[:, channel]).mean() a_new[:, channel] = a[:, channel] else: mean_activation = abs(a[..., channel]).mean() a_new[..., channel] = a[..., channel] y_pred = [] acc = 0 loss = 0 delta = 128 for i in range(0, a_new.shape[0], delta): x = a_new[i:i + delta] b = func([x, x_val[i:i + delta]])[0] #print(x.shape,b.shape,b[y[i]]) #y_pred.extend(b) acc += (np.argmax(b, axis=1) == y[i:i + delta]).sum() ind = np.meshgrid(np.arange(b.shape[1]), np.arange(b.shape[0]))[0] loss -= np.log(b[ind == y[i:i + delta].reshape(x.shape[0], 1)]).sum() #for j in range(b.shape[0]): # loss-=math.log(b[j][y[i+j]]) #loss-=np.log().sum() #if np.argmax(b)==y[i]: # acc+=1 #loss-=math.log(b[y[i]]) y_pred = np.array(y_pred) acc /= a_new.shape[0] loss /= a_new.shape[0] print(channel, 'y_pred:', y_pred.shape, acc, loss, mean_activation) importances.append(loss) #print(indices,y[indices]) #print(pair_dist) #sys.exit(0) #sys.exit(0) importances = np.array(importances) return importances
def get_channels_importance(model, layer, x_val, y, node_indices=None): if isinstance(layer, str): layer = model.get_layer(name=layer) # Check that layer is in the model if layer not in model.layers: raise ValueError('layer is not a valid Layer in model.') layer_node_indices = utils.find_nodes_in_model(model, layer) print('layer:', layer, layer_node_indices) # If no nodes are specified, all of the layer's inbound nodes which are # in model are selected. if not node_indices: node_indices = layer_node_indices # Check for duplicate node indices elif len(node_indices) != len(set(node_indices)): raise ValueError('`node_indices` contains duplicate values.') # Check that all of the selected nodes are in the layer elif not set(node_indices).issubset(layer_node_indices): raise ValueError('One or more nodes specified by `layer` and ' '`node_indices` are not in `model`.') data_format = getattr(layer, 'data_format', 'channels_last') # Perform the forward pass and get the activations of the layer. importances = [] print('layer:', layer, layer_node_indices, node_indices) if len(node_indices) > 1: print('ERROR!!!!!!!!!!!!!!!!!!!!!!!!') # Get activations if hasattr(x_val, "__iter__"): temp_model = Model(model.inputs, layer.output) print('before: act output', layer.output) a = temp_model.predict(x_val) #a=temp_model.predict_generator(x_val, x_val.n // x_val.batch_size) if data_format == 'channels_first': a = np.swapaxes(a, 1, -1) print('after:', layer, a.shape, data_format) # Flatten all except channels axis for channel in range(a.shape[-1]): activations = a[..., channel] activations = np.reshape(activations, [activations.shape[0], -1]) #print('after:',layer,activations.shape) #pair_dist=activations pair_dist = pairwise_distances(activations) #pair_dist/=pair_dist.mean() if False: importance = (abs(pair_dist)).sum() elif False: indices = np.argsort(pair_dist, axis=1) same_class = (y[indices[:, 1:]] == y.reshape(y.shape[0], 1)) other_indices = indices[:, 1:] first_same_class = np.argmax(same_class, axis=1) first_other_class = np.argmin(same_class, axis=1) #importance=sum([pair_dist[i,other_indices[i,first_same_class[i]]]/pair_dist[i,other_indices[i,first_other_class[i]]] for i in range(y.shape[0])])/y.shape[0] #importance=sum([pair_dist[i,other_indices[i,first_same_class[i]]] for i in range(y.shape[0])])/y.shape[0] importance = sum([ pair_dist[i, other_indices[i, first_other_class[i]]] for i in range(y.shape[0]) ]) / y.shape[0] else: classes = np.unique(y) num_classes = classes.shape[0] delta = len(y) // num_classes #class_dists=np.array([[np.median(pair_dist[y==classes[i]][:,y==classes[j]]) for j in range(num_classes)] for i in range(num_classes)]) #class_dists=np.array([[pair_dist[y==classes[i]][:,y==classes[j]][np.where(pair_dist[y==classes[i]][:,y==classes[j]]!=0)].mean() for j in range(num_classes)] for i in range(num_classes)]) #class_dists=np.array([[np.mean(pair_dist[i:i+delta,j:j+delta]) for j in range(0,len(y),delta)] for i in range(0,len(y),delta)]) pdr = pair_dist.reshape(num_classes, delta, num_classes, delta) class_dists = np.median(pdr, axis=(1, 3)) #class_dists=np.sum(pdr,axis=(1,3))/np.sum(pdr>0,axis=(1,3)) #instance_dists=np.array([[pair_dist[y==y[i]][:,y==y[j]][np.where(pair_dist[y==y[i]][:,y==y[j]]!=0)].mean() for j in range(y.shape[0])] for i in range(y.shape[0])]) #instance_dists=np.array([[np.median(pair_dist[y==y[i]][:,y==y[j]]) for j in range(y.shape[0])] for i in range(y.shape[0])]) #instance_dists=np.array([[np.median(pair_dist[y==y[i]][:,y==y[j]]) for j in range(y.shape[0])] for i in range(y.shape[0])]) #instance_dists=np.array([[class_dists[y[i]][y[j]] for j in range(y.shape[0])] for i in range(y.shape[0])]) instance_dists = np.repeat(np.repeat(class_dists, delta, axis=0), delta, axis=1) importance = -(((pair_dist - instance_dists)**2) / instance_dists).sum() #+np.log(instance_dists) #if abs(importance)<0.01: # print(channel,pair_dist,instance_dists) importances.append(importance) #print(indices,y[indices]) #print(first_same_class,first_other_class) #print(pair_dist) #sys.exit(0) importances = np.array(importances) return importances
def get_output_sum(model, layer, x_val, node_indices=None, steps=None): """ Args: model: A Keras model. layer: The layer whose channels will be evaluated for pruning. x_val: The input of the validation set. This will be used to calculate the activations of the layer of interest. node_indices(list[int]): (optional) A list of node indices. steps: number of steps for a generator Returns: total: total output given a dataset from each kernel """ if isinstance(layer, str): layer = model.get_layer(name=layer) # Check that layer is in the model if layer not in model.layers: raise ValueError('layer is not a valid Layer in model.') layer_node_indices = utils.find_nodes_in_model(model, layer) # If no nodes are specified, all of the layer's inbound nodes which are # in model are selected. if not node_indices: node_indices = layer_node_indices # Check for duplicate node indices elif len(node_indices) != len(set(node_indices)): raise ValueError('`node_indices` contains duplicate values.') # Check that all of the selected nodes are in the layer elif not set(node_indices).issubset(layer_node_indices): raise ValueError('One or more nodes specified by `layer` and ' '`node_indices` are not in `model`.') data_format = getattr(layer, 'data_format', 'channels_last') # Perform the forward pass and get the activations of the layer. total_sum = None for node_index in node_indices: act_layer, act_index = utils.find_activation_layer(layer, node_index) # Get activations if isinstance(x_val, types.GeneratorType): # temp_model = Model(model.inputs, act_layer.get_output_at(act_index)) temp_model = Model(model.inputs, layer.get_output_at(node_index)) a = temp_model.predict_generator_intermediate(x_val, steps=steps) else: get_activations = k.function( [utils.single_element(model.inputs), k.learning_phase()], [act_layer.get_output_at(act_index)]) a = get_activations([x_val, 0])[0] # Ensure that the channels axis is last if data_format == 'channels_first': a = np.swapaxes(a, 1, -1) numAxes = len(a.shape) - 1 total = np.sum(a, axis=numAxes) for n in range(numAxes - 1, -1, -1): total = np.sum(total, axis=n) # previous - for time distributed convolution # a = np.abs(a) # total = np.sum(a, axis=2) # total = np.sum(total, axis=1) # total = np.sum(total, axis=0) if total_sum is None: total_sum = total else: total_sum += total return total_sum