def pruneInLayer(self, layer): count = 0 for m in self.Pruning.params.network.modules(): if isConvolutionLayer(m): if count != layer: count += 1 continue layerAmount = int( (self.Pruning.percentage * m.out_channels) / 100.0) centroid = self.findCentralPoint(m.weight.data) order = self.arrangeFiltersByDistance(m.weight.data, centroid) toDelete = [] for filter in range(layerAmount): toDelete.append((layer, order[filter][0])) toDelete = sorted(toDelete, key=lambda toDelete: toDelete[1], reverse=True) print("Size: ", m.weight.data.size()) logstring = "Layer " + str(layer) + " deleting: " + str( toDelete) self.logprune.info(logstring) if self.Pruning.manner == 'soft': softPruneFilters(self.Pruning, self.Pruning.params.network, toDelete) elif self.Pruning.manner == 'hard': hardPruneFilters(self.Pruning, toDelete) self.prunedfilters += len(toDelete) return
def __call__(self): with torch.no_grad(): self.values = [] logstring = "" filtersperlayer = [] filtersperlayerpruned = [] #pool = mp.Pool(mp.cpu_count()) # Allows parallelization layer = 0 for m in self.Pruning.params.network.modules(): if isConvolutionLayer(m): filtersperlayer.append(m.out_channels) layer += 1 for number in range(layer): if self.Pruning.dependencies[number][2] == True: logstring = "working in layer " + str(number) self.logprune.info(logstring) #pool.apply(self.pruneInLayer, args=(number, )) self.pruneInLayer(number) #pool.close() # check final amount of filters finalcount = 0 for m in self.Pruning.params.network.modules(): if isConvolutionLayer(m): filtersperlayerpruned.append(m.out_channels) finalcount += m.out_channels deleteGrads(self.Pruning) logstring = "The final amount of filters after pruning is " + str( finalcount) self.logprune.info(logstring) logstring = str(self.Pruning.manner) + " pruned " + str( self.prunedfilters) + " filters" self.logprune.info(logstring) self.logprune.info("Filters before pruning:") self.logprune.info(filtersperlayer) self.logprune.info("Filters after pruning:") self.logprune.info(filtersperlayerpruned)
def clusterInLayer(self, layer): count = 0 for m in self.Pruning.params.network.modules(): if isConvolutionLayer(m): if count != layer: count += 1 continue clusterAmount = int( ((100.0 - self.Pruning.percentage) * m.out_channels) / 100.0) return self.makeClusters(m, clusterAmount)
def step(self, closure=None): loss = None if closure is not None: loss = closure() layer = 0 allowedlayer = 0 for m in self.Pruning.params.network.modules(): if isConvolutionLayer(m): if (self.Pruning.dependencies[layer][2] == True): for cluster in self.clusterlist[allowedlayer]: if m.weight.grad is None: continue clusterdimensionality = len(cluster) filtersum = torch.zeros([ m.weight.data.shape[1], m.weight.data.shape[2], m.weight.data.shape[3] ], device=self.Pruning.device) gradientsum = torch.zeros([ m.weight.data.shape[1], m.weight.data.shape[2], m.weight.data.shape[3] ], device=self.Pruning.device) for filter in cluster: filtersum += m.weight[filter] gradientsum += m.weight.grad[filter] for filter in cluster: deltafilter = torch.zeros( [ m.weight.data.shape[1], m.weight.data.shape[2], m.weight.data.shape[3] ], device=self.Pruning.device) deltafilter -= gradientsum / clusterdimensionality deltafilter -= self.weight_decay * m.weight[filter] deltafilter += self.centripetal_force * ( (filtersum / clusterdimensionality) - m.weight[filter]) with torch.no_grad(): m.weight[filter] = m.weight[filter].add( self.lr * deltafilter) del deltafilter del clusterdimensionality, filtersum, gradientsum allowedlayer += 1 layer += 1 return loss
def updateNormValues(self, layer_index): count = 0 for m in self.Pruning.params.network.modules(): if isConvolutionLayer(m): if (count == layer_index): with torch.no_grad(): # Sum of squared values divided by the amount of values weights = m.weight.data values_this_layer = weights.pow(2).sum(1).sum(1).sum( 1) / weights.shape[1] * weights.shape[ 2] * weights.shape[3] # normalization (important) values_this_layer = values_this_layer / torch.sqrt( torch.pow(values_this_layer, 2).sum()) min_value, min_ind = arg_nonzero_min( list(values_this_layer)) self.values[layer_index] = [min_value, min_ind] break else: count += 1
def __call__(self): with torch.no_grad(): self.values = [] if (self.Pruning.manner == 'soft'): self.modelcopy = copy.deepcopy(self.Pruning.params.network) self.prunelist = [] filtersperlayer = [] filtersperlayerpruned = [] logstring = "" # Calculate the initial norm values and # calculate total amount of filters and the amount to prune count = 0 for m in self.Pruning.params.network.modules(): if isConvolutionLayer(m): filtersperlayer.append(m.out_channels) self.totalfilters += m.out_channels if (self.Pruning.dependencies[count][2] == True): self.allowedfilters += m.out_channels # Sum of squared values divided by the amount of values weights = m.weight.data values_this_layer = weights.pow(2).sum(1).sum(1).sum( 1 ) / weights.shape[1] * weights.shape[2] * weights.shape[3] # normalization (important) values_this_layer = values_this_layer / torch.sqrt( torch.pow(values_this_layer, 2).sum()) min_value, min_ind = arg_nonzero_min( list(values_this_layer)) self.values.append([min_value, min_ind]) count += 1 self.values = np.array(self.values) self.pruneamount = int(0.01 * self.Pruning.percentage * self.totalfilters) logstring = 'There are ' + str(self.totalfilters) + ' filters in convolutional layers, allowed to prune in ' + str(self.allowedfilters)\ + ', attempting to prune ' + str(self.pruneamount) + ' filters' self.logprune.info(logstring) # Prune as long as the conditions aren't met while self.prunedfilters < self.pruneamount and not ( self.pruneamount > self.allowedfilters): layer_index = self.findLayerIndex() filter_index = int(self.values[layer_index, 1]) prunetuple = (layer_index, filter_index) prunelist = [prunetuple] if self.Pruning.manner == 'soft': self.prunelist.append(prunetuple) hardPruneFilters(self.Pruning, prunelist) self.prunedfilters += 1 self.updateNormValues(layer_index) if self.Pruning.manner == 'soft': self.modelcopy.to('cuda') softPruneFilters(self.Pruning, self.modelcopy, self.prunelist) self.Pruning.params.network = self.modelcopy finalcount = 0 for m in self.Pruning.params.network.modules(): if isConvolutionLayer(m): filtersperlayerpruned.append(m.out_channels) finalcount += m.out_channels deleteGrads(self.Pruning) logstring = "The final amount of filters after pruning is " + str( finalcount) self.logprune.info(logstring) logstring = str(self.Pruning.manner) + " pruned " + str( self.prunedfilters) + " filters" self.logprune.info(logstring) self.logprune.info("Filters before pruning:") self.logprune.info(filtersperlayer) self.logprune.info("Filters after pruning:") self.logprune.info(filtersperlayerpruned)
def __call__(self): self.values = [] logstring = "" filtersperlayer = [] filtersperlayerpruned = [] totalfilters = 0 layer = 0 for m in self.Pruning.params.network.modules(): if isConvolutionLayer(m): filtersperlayer.append(m.out_channels) totalfilters += m.out_channels layer += 1 # Generate clusters clusterlist = [] for number in range(layer): if (self.Pruning.dependencies[number][2] == True): logstring = "Making clusters for layer " + str(number) self.logprune.info(logstring) clusterlist.append(self.clusterInLayer(number)) self.Pruning.params.network.train() self.Pruning.params.batch = 0 self.Pruning.params.epoch = 0 self.Pruning.params.optimizer = C_SGD( self.Pruning.params.network.parameters(), clusterlist, self.Pruning, lr=0.03, weight_decay=self.Pruning.params.weight_decay, centripetal_force=0.003, ) burn_in = torch.optim.lr_scheduler.LambdaLR( self.Pruning.params.optimizer, lambda b: (b / self.Pruning.params.burnin)**4, ) step = torch.optim.lr_scheduler.MultiStepLR( self.Pruning.params.optimizer, milestones=self.Pruning.params.milestones, gamma=self.Pruning.params.gamma, ) self.Pruning.params.scheduler = ln.engine.SchedulerCompositor( (0, burn_in), (self.Pruning.params.burnin, step), ) # TRAINER VAN HIER self.logprune.info('Start training') self.Pruning.params.network.train() batch_subdivisions = self.Pruning.params.batch_size // self.Pruning.params.mini_batch_size idx = 0 while True: idx %= batch_subdivisions loader = self.training_dataloader for idx, data in enumerate(loader, idx + 1): # Batch Start # Forward and backward on (mini-)batches # process_batch data, target = data data = data.to(self.Pruning.device) out = self.Pruning.params.network(data) loss = self.Pruning.params.loss(out, target) / batch_subdivisions loss.backward() if idx % batch_subdivisions != 0: continue # Optimizer step self.batch += 1 # Should only be called after train, but this is easier to use self.batch in function logstring = "Cluster retraining batch: " + str(self.batch) self.logprune.info(logstring) #self.train_batch() self.Pruning.params.optimizer.step() self.Pruning.params.optimizer.zero_grad() self.Pruning.params.scheduler.step(self.batch, epoch=self.batch) # Batch End # Epoch End self.epoch += 1 logstring = "Cluster retraining epoch: " + str(self.epoch) self.logprune.info(logstring) # Test to see if filters grew to eachoter by calculating chi chi = 0 layer = 0 allowedlayer = 0 for m in self.Pruning.params.network.modules(): if isConvolutionLayer(m): if (self.Pruning.dependencies[layer][2] == True): clustercount = -1 for cluster in clusterlist[allowedlayer]: clustercount += 1 clusterdimensionality = len(cluster) filtersum = torch.zeros([ m.weight.data.shape[1], m.weight.data.shape[2], m.weight.data.shape[3] ], device=self.Pruning.device) for filter in cluster: filtersum += m.weight[filter] filtersum = filtersum / clusterdimensionality for filter in cluster: chi += ( m.weight[filter] - filtersum ).pow(2).sum(1).sum(1).sum( 0) / filtersum.shape[0] * filtersum.shape[ 1] * filtersum.shape[2] break break logstring = "Log10(chi) of epoch " + str( self.epoch) + " is: " + str(math.log10(chi)) self.logprune.info(logstring) if self.epoch == 50: break # TOT HIER # Prune all filters in each cluster except for the first indexed filter layer = 0 allowedlayer = 0 for m in self.Pruning.params.network.modules(): if isConvolutionLayer(m): if (self.Pruning.dependencies[layer][2] == True): clustercount = -1 for cluster in clusterlist[allowedlayer]: clustercount += 1 if len(cluster) < 2: continue combineFilters(layer, cluster, self.Pruning) # Adjust integers in other clusters to account for shifts in tensor because of deleted filters otherclustercount = 0 for othercluster in clusterlist[allowedlayer]: if otherclustercount > clustercount: for i in cluster: itemcount = 0 for item in othercluster: if item >= i: othercluster[itemcount] -= 1 itemcount += 1 otherclustercount += 1 allowedlayer += 1 layer += 1 # check final amount of filters finalcount = 0 for m in self.Pruning.params.network.modules(): if isConvolutionLayer(m): filtersperlayerpruned.append(m.out_channels) finalcount += m.out_channels deleteGrads(self.Pruning) logstring = "The final amount of filters after pruning is " + str( finalcount) self.logprune.info(logstring) self.prunedfilters = totalfilters - finalcount logstring = str(self.Pruning.manner) + " pruned " + str( self.prunedfilters) + " filters" self.logprune.info(logstring) self.logprune.info("Filters before pruning:") self.logprune.info(filtersperlayer) self.logprune.info("Filters after pruning:") self.logprune.info(filtersperlayerpruned)