def decompose(self): print('\n{}Bayesian Begin'.format('\033[33m')) print('↓↓↓↓↓↓↓↓↓↓↓↓↓↓{}\n'.format('\033[0m')) self.conv_decomposition() print('-------------> Decomposition Finish') print('Final Fine_tune ...') fine_tune(self.model, 30) fine_tune_test(self.model, self.testloader, True) print('The Decomposed Model ...') print(self.model) mac, weight = self.get_model_mac_weight(self.model) #deploy to target save_model_name = export_onnx_model(self.model) decomp_runtime = deploy_by_rpc(save_model_name) self.decomp_runtime_ms = decomp_runtime * 1000 os.remove(save_model_name) tmp_decomp_predict_runtime, tmp_decomp_layer_runtime = self.get_model_predict_runtime(self.model) print('Origin_MAC: {}, Origin_Weight: {}, Origin_Runtime: {}, Origin_Predict_Runtime: {}'.format(self.origin_mac, self.origin_weight, self.real_model_runtime, self.origin_model_runtime)) print('Decomp_MAC: {}, Decomp_Weight: {}, Decomp_Runtime: {}, Decomp_Predict_Runtime: {}'.format(mac, weight, self.decomp_runtime_ms, tmp_decomp_predict_runtime)) print('Speedup : {}'.format(float(self.real_model_runtime/self.decomp_runtime_ms)))
def decompose(self): print('\n{}Bayesian Begin'.format('\033[33m')) print('↓↓↓↓↓↓↓↓↓↓↓↓↓↓{}\n'.format('\033[0m')) self.conv_decomposition() self.fc_decomposition() print('-------------> Decomposition Finish') print('Final Fine_tune ...') fine_tune(self.model, 30) acc = fine_tune_test(self.model, self.testloader, True) print('The Decomposed Model ...') print(self.model) mac, weight = self.get_model_mac_weight(self.model) ''' #deploy to target save_model_name = export_onnx_model(self.model) decomp_runtime = deploy_by_rpc(save_model_name) self.decomp_runtime_ms = decomp_runtime * 1000 os.remove(save_model_name) ''' tmp_decomp_predict_runtime, tmp_decomp_layer_runtime = self.get_model_predict_runtime( self.model) print( 'Origin_MAC: {}, Origin_Weight: {}, Origin_Runtime: {}, Origin_Predict_Runtime: {}' .format(self.origin_mac, self.origin_weight, self.real_model_runtime, self.origin_model_runtime)) #print('Decomp_MAC: {}, Decomp_Weight: {}, Decomp_Runtime: {}, Decomp_Predict_Runtime: {}'.format(mac, weight, self.decomp_runtime_ms, tmp_decomp_predict_runtime)) print('Decomp_MAC: {}, Decomp_Weight: {}, Decomp_Predict_Runtime: {}'. format(mac, weight, tmp_decomp_predict_runtime)) print('ACC: {}'.format(acc)) #print('Speedup : {}'.format(float(self.real_model_runtime/self.decomp_runtime_ms))) state = { 'model': self.model, 'bayesian_iter': self.bayesian_iter, 'acc': acc, #'real_runtime':self.decomp_runtime_ms, #'predict_runtime':tmp_decomp_predict_ms, 'mac': mac, 'weight': weight } torch.save( state, 'result/all_decomposed/' + str(self.constrain) + '_alexnet_model')
def fc_decomposition(self): VBMF_model = torch.load('checkpoint/VBMF_alexnet_model') _, tmp_VBMF_layer_runtime = self.get_model_predict_runtime(VBMF_model) #remain_budget = 0.0 N_classifier = len(self.model.classifier._modules.keys()) for i, key in enumerate(self.model.classifier._modules.keys()): if i >= N_classifier - 2: break if isinstance(self.model.classifier._modules[key], torch.nn.modules.linear.Linear): fc_layer_to_decompose = self.model.classifier._modules[key] #print('Travis fc_layer_to_decompose rank1:{}, rank2:{}'.format(fc_layer_to_decompose.in_features, fc_layer_to_decompose.out_features)) print('\n{}Layer Info{}'.format('\033[31m', '\033[0m')) print('{}-----------------------------------------{}'.format( '\033[94m', '\033[0m')) print('Rank1:{}, Rank2:{}'.format( fc_layer_to_decompose.in_features, fc_layer_to_decompose.out_features)) print(self.estimate(fc_layer_to_decompose, key)) self.decomposed_layer_info['key'] = key self.fc_target_rate = 0.5 rank, b_iter = self.fc_bayesian( fc_layer_to_decompose.in_features * fc_layer_to_decompose.out_features // (fc_layer_to_decompose.in_features + fc_layer_to_decompose.out_features)) self.bayesian_iter['fc_' + key] = b_iter #rank = self.fc_bayesian(fc_layer_to_decompose.in_features*fc_layer_to_decompose.out_features//(fc_layer_to_decompose.in_features+fc_layer_to_decompose.out_features)) tmp_ms_1 = self.estimate_with_config( [fc_layer_to_decompose.in_features, rank[0]]) tmp_ms_2 = self.estimate_with_config( [rank[0], fc_layer_to_decompose.out_features]) tmp_runtime_ms = tmp_ms_1 + tmp_ms_2 #assert (tmp_runtime_ms - self.VBMF_layer_runtime['fc_'+key]) > 0 decompose = tucker_decomposition_fc_layer_without_rank( self.model.classifier._modules[key], rank) self.model.classifier._modules[key] = decompose fine_tune(self.model, 10) fine_tune_test(self.model, self.testloader, True)
def conv_decomposition(self): conv_layer_to_decompose = self.model.features._modules['6'] key = '6' self.decomposed_layer_info['key'] = key self.decomposed_layer_info['image_size'] = self.model_image_size[key][0] self.decomposed_layer_info['kernel_size'] = conv_layer_to_decompose.kernel_size[0] self.decomposed_layer_info['stride'] = conv_layer_to_decompose.stride[0] self.decomposed_layer_info['padding'] = conv_layer_to_decompose.padding[0] ranks = self.conv_bayesian([conv_layer_to_decompose.in_channels, conv_layer_to_decompose.out_channels]) tmp_ms_1 = self.estimate_with_config([self.decomposed_layer_info['image_size'], \ conv_layer_to_decompose.in_channels, ranks[0], \ 1, \ self.decomposed_layer_info['stride'], \ self.decomposed_layer_info['padding']]) tmp_ms_2 = self.estimate_with_config([self.decomposed_layer_info['image_size'], \ ranks[0], ranks[1], \ self.decomposed_layer_info['kernel_size'], \ self.decomposed_layer_info['stride'], \ self.decomposed_layer_info['padding']]) tmp_ms_3 = self.estimate_with_config([self.decomposed_layer_info['image_size'], \ ranks[1], conv_layer_to_decompose.out_channels, \ 1, \ self.decomposed_layer_info['stride'], \ self.decomposed_layer_info['padding']]) tmp_runtime_ms = tmp_ms_1 + tmp_ms_2 + tmp_ms_3 print('{}Fine Tune{}'.format('\033[31m', '\033[0m')) print('{}-----------------------------------------{}'.format('\033[94m', '\033[0m')) ranks[0], ranks[1] = ranks[1], ranks[0] decompose = tucker_decomposition_conv_layer_without_rank(self.model.features._modules[key],ranks) self.model.features._modules[key] = decompose fine_tune(self.model, 10) fine_tune_test(self.model, self.testloader, True)
def prune(self): print('\n{}NetAdapt Begin{}'.format(bc.yellow, bc.end)) print('{}↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓{}'.format(bc.yellow, bc.end)) print("\n{}Runtime_budget is {}{}".format(bc.green, runtime_budget, bc.end)) #Get the accuracy before prunning self.test() self.model.train() #Make sure all the layers are trainable for param in self.model.features.parameters(): param.requires_grad = True model_image_size = {} if (dataset == 'cifar'): in_image_size = 32 for i, key in enumerate(self.model.features._modules.keys()): if isinstance(self.model.features._modules[key], torch.nn.modules.conv.Conv2d): conv_layer = self.model.features._modules[key] after_image_size = ( (in_image_size - conv_layer.kernel_size[0] + 2 * conv_layer.padding[0]) // conv_layer.stride[0]) + 1 model_image_size[key] = [in_image_size, after_image_size] in_image_size = after_image_size elif isinstance(self.model.features._modules[key], torch.nn.modules.MaxPool2d): maxpool_layer = self.model.features._modules[key] after_image_size = ( (in_image_size - maxpool_layer.kernel_size) // maxpool_layer.stride) + 1 model_image_size[key] = [in_image_size, after_image_size] in_image_size = after_image_size print(model_image_size) #Get the index of each layer self.layer_index = [] for i, key in enumerate(self.model.features._modules.keys()): if isinstance(self.model.features._modules[key], torch.nn.modules.conv.Conv2d): self.layer_index.append(key) #Get the runtime before prunning self.origin_predict_layer_runtime = {} origin_predict_runtime = 0.0 for i, key in enumerate(self.model.features._modules.keys()): if isinstance(self.model.features._modules[key], torch.nn.modules.conv.Conv2d): conv_layer = self.model.features._modules[key] tmp_perf_runtime = self.perf_model.conv_predict(model_image_size[key][0], conv_layer.in_channels, conv_layer.out_channels ,\ conv_layer.kernel_size[0], conv_layer.stride[0], conv_layer.padding[0]) self.origin_predict_layer_runtime['conv_' + key] = tmp_perf_runtime origin_predict_runtime += tmp_perf_runtime for i, key in enumerate(self.model.classifier._modules.keys()): if isinstance(self.model.classifier._modules[key], torch.nn.modules.linear.Linear): fc_layer = self.model.classifier._modules[key] tmp_perf_runtime = self.perf_model.fc_predict( fc_layer.in_features, fc_layer.out_features) self.origin_predict_layer_runtime['fc_' + key] = tmp_perf_runtime origin_predict_runtime += tmp_perf_runtime print('{}Predict_Origin_predict_runtime{}: {}ms'.format( bc.green, bc.end, origin_predict_runtime)) print('{}Predict_Origin_predict_layer_runtime{}: {}\n'.format( bc.green, bc.end, self.origin_predict_layer_runtime)) self.decomposed_predict_layer_runtime = copy.deepcopy( self.origin_predict_layer_runtime) origin_model = copy.deepcopy(self.model) #model_runtime = origin_runtime * 1000 perf_model_runtime = origin_predict_runtime iteration_count = 1 while (perf_model_runtime > runtime_budget): print('{}Iteration {}{}'.format(bc.red, iteration_count, bc.end)) print('{}--------------------------------------------{}'.format( bc.red, bc.end)) if (iteration_count > 100): import sys print('iteration > 100') sys.exit(1) number_of_filters = self.total_num_filters() print("Ranking filters.. ") prune_targets = self.get_rank_to_prune(number_of_filters) print('{}Number_of_filters{}: {}'.format(bc.green, bc.end, number_of_filters)) print('{}Initialization{}: {}, {}Decay{}: {}'.format( bc.green, bc.end, self.initialization, bc.green, bc.end, self.decay_rate)) #print('Travis model: ', self.model) #print('Travis prune_targets: ', prune_targets) print('') layer_record = {} model_record = {} #tmp_model = copy.deepcopy(self.model) for i, key in enumerate(self.model.features._modules.keys()): if isinstance(self.model.features._modules[key], torch.nn.modules.conv.Conv2d): prune_conv_layer = self.model.features._modules[key] if (self.layer_index.index(key) != len(self.layer_index) - 1): next_conv_layer = self.model.features._modules[ self.layer_index[self.layer_index.index(key) + 1]] else: next_conv_layer = None print('{}Convolution Layer {}{}'.format( bc.light_blue, key, bc.end)) print('Curr Layer: {}, Next Layer: {}'.format( prune_conv_layer, next_conv_layer)) tmp_model = copy.deepcopy(self.model) #print('Travis tmp_model: {}'.format(tmp_model)) layer_all_filter = [] for i in prune_targets: if (str(i[0]) == key): layer_all_filter.append(i) tmp_perf_runtime = self.perf_model.conv_predict(model_image_size[key][0], prune_conv_layer.in_channels, prune_conv_layer.out_channels ,\ prune_conv_layer.kernel_size[0], prune_conv_layer.stride[0], prune_conv_layer.padding[0]) if (tmp_perf_runtime <= self.initialization): print('{}tmp_perf_runtime <= self.initialization{}'. format(bc.purple, bc.end)) print('tmp_perf_runtime: {}, self.initialzation: {}'. format(tmp_perf_runtime, self.initialization)) print('image_size: {}, in_channels: {}, out_channels: {}, kernel_size: {}, stride: {}, padding: {}'.format(model_image_size[key][0],\ prune_conv_layer.in_channels, prune_conv_layer.out_channels, prune_conv_layer.kernel_size[0], prune_conv_layer.stride[0],\ prune_conv_layer.padding[0])) layer_record[key] = [0, None, None] print('') continue if (next_conv_layer != None): prune_layer_runtime_1 = self.perf_model.conv_predict(model_image_size[key][0], prune_conv_layer.in_channels, prune_conv_layer.out_channels ,\ prune_conv_layer.kernel_size[0], prune_conv_layer.stride[0], prune_conv_layer.padding[0]) prune_layer_runtime_2 = self.perf_model.conv_predict(model_image_size[self.layer_index[self.layer_index.index(key)+1]][0], next_conv_layer.in_channels, next_conv_layer.out_channels ,\ next_conv_layer.kernel_size[0], next_conv_layer.stride[0], next_conv_layer.padding[0]) prune_layer_runtime = prune_layer_runtime_1 + prune_layer_runtime_2 else: prune_layer_runtime = self.perf_model.conv_predict(model_image_size[key][0], prune_conv_layer.in_channels, prune_conv_layer.out_channels ,\ prune_conv_layer.kernel_size[0], prune_conv_layer.stride[0], prune_conv_layer.padding[0]) for tmp_out_channel in range( prune_conv_layer.out_channels - 1, 1, -1): if (next_conv_layer != None): tmp_runtime_1 = self.perf_model.conv_predict(model_image_size[key][0], prune_conv_layer.in_channels, tmp_out_channel ,\ prune_conv_layer.kernel_size[0], prune_conv_layer.stride[0], prune_conv_layer.padding[0]) tmp_runtime_2 = self.perf_model.conv_predict(model_image_size[self.layer_index[self.layer_index.index(key)+1]][0], tmp_out_channel, next_conv_layer.out_channels ,\ next_conv_layer.kernel_size[0], next_conv_layer.stride[0], next_conv_layer.padding[0]) tmp_runtime = tmp_runtime_1 + tmp_runtime_2 else: tmp_runtime_1 = self.perf_model.conv_predict(model_image_size[key][0], prune_conv_layer.in_channels, tmp_out_channel ,\ prune_conv_layer.kernel_size[0], prune_conv_layer.stride[0], prune_conv_layer.padding[0]) tmp_runtime = tmp_runtime_1 #print('Travis tmp_out_channel: {}, tmp_runtime: {}, prune_layer_runtime: {}'.format(tmp_out_channel, tmp_runtime, prune_layer_runtime)) if ((prune_layer_runtime - tmp_runtime) >= self.initialization): print('Travis prune_layer_runtime: {}, tmp_runtime: {}, prune_layer_runtime-tmp_runtime: {}'.format(prune_layer_runtime, \ tmp_runtime, prune_layer_runtime-tmp_runtime)) num_filter_to_prune = prune_conv_layer.out_channels - tmp_out_channel break layer_prune_target = layer_all_filter[ 0:num_filter_to_prune] #for i in layer_prune_target: # print(i) prune_plan = self.prunner.get_layer_prunning_plan( layer_prune_target) #print(layer_prune_target) #print(prune_plan) layers_prunned = {} for layer_index, filter_index in prune_plan: if layer_index not in layers_prunned: layers_prunned[layer_index] = 0 layers_prunned[ layer_index] = layers_prunned[layer_index] + 1 print("Layers that will be prunned", layers_prunned) print("Prunning filters.. ") cpu_model = tmp_model.cpu() for layer_index, filter_index in prune_plan: model = prune_vgg16_conv_layer(cpu_model, layer_index, filter_index) tmp_model = cpu_model.cuda() tmp_model, tmp_acc = fine_tune(tmp_model, True) layer_record[key] = [tmp_acc, prune_plan, tmp_model] print('Acc after Fine_Tune {}'.format(tmp_acc)) print('') #print('Travis Model', self.model) acc_max = [0, -1] for i, key in enumerate(layer_record.keys()): if layer_record[key][0] > acc_max[1]: acc_max = [key, layer_record[key][0]] print('{}Pick max acc..{} key: {}, acc: {}'.format( bc.blue, bc.end, acc_max[0], acc_max[1])) ## Travis Test self.model = layer_record[acc_max[0]][2] self.prunner.modify_model(layer_record[acc_max[0]][2]) print(self.model) self.test() tmp_latency = 0.0 tmp_layer_latency = {} for i, key in enumerate(self.model.features._modules.keys()): if isinstance(self.model.features._modules[key], torch.nn.modules.conv.Conv2d): tmp_conv_layer = self.model.features._modules[key] tmp_perf_runtime = self.perf_model.conv_predict(model_image_size[key][0], tmp_conv_layer.in_channels, tmp_conv_layer.out_channels ,\ tmp_conv_layer.kernel_size[0], tmp_conv_layer.stride[0], tmp_conv_layer.padding[0]) tmp_layer_latency['conv_' + key] = tmp_perf_runtime tmp_latency += tmp_perf_runtime for i, key in enumerate(self.model.classifier._modules.keys()): if isinstance(self.model.classifier._modules[key], torch.nn.modules.linear.Linear): fc_layer = self.model.classifier._modules[key] tmp_perf_runtime = self.perf_model.fc_predict( fc_layer.in_features, fc_layer.out_features) tmp_layer_latency['fc_' + key] = tmp_perf_runtime tmp_latency += tmp_perf_runtime print( '{}Predict Runtime after iteration {}: {}ms, reduction: {}ms{}' .format('\033[32m', iteration_count, tmp_latency, perf_model_runtime - tmp_latency, '\033[0m')) print('Runtime for each layer: {}'.format(tmp_layer_latency)) perf_model_runtime = tmp_latency #torch.save(self.model, "result/prunned_model/model_" + str(runtime_budget) + '_' + str(iteration_count) + "_prunned") ## Get runtime after one iteration tmp_save_model_name = export_onnx_model(self.model) tmp_model_runtime = deploy_by_rpc(tmp_save_model_name) * 1000 print('{}Real Runtime after iteration {}: {}ms{}'.format( '\033[32m', iteration_count, tmp_model_runtime, '\033[0m')) os.remove(tmp_save_model_name) iteration_count += 1 self.initialization *= self.decay_rate print('') print("{}Finished. Going to fine tune the model a bit more{}".format( '\033[33m', '\033[0m')) optimizer = optim.SGD(self.model.parameters(), lr=0.0001, momentum=0.9) self.train(optimizer, epoches=15) print('Travis self.model', self.model) final_acc = fine_tune_test(self.model, self.test_data_loader, True) ## Get runtime after one iteration tmp_save_model_name = export_onnx_model(self.model) tmp_model_runtime = deploy_by_rpc(tmp_save_model_name) * 1000 print('Runtime after pruning: {}ms'.format(tmp_model_runtime)) os.remove(tmp_save_model_name) model_runtime_after_pruning = tmp_model_runtime state = { 'net': self.model, 'acc': final_acc, 'iteration_count': iteration_count, 'model_runtime': model_runtime_after_pruning, } torch.save( state, "result/prunned_model/model_" + str(runtime_budget) + "_prunned")
def fc_decomposition(self): VBMF_model = torch.load('checkpoint/VBMF_alexnet_model') _, tmp_VBMF_layer_runtime = self.get_model_predict_runtime(VBMF_model) #remain_budget = 0.0 N_classifier = len(self.model.classifier._modules.keys()) for i, key in enumerate(self.model.classifier._modules.keys()): if i >= N_classifier - 2: break if isinstance(self.model.classifier._modules[key], torch.nn.modules.linear.Linear): fc_layer_to_decompose = self.model.classifier._modules[key] #print('Travis fc_layer_to_decompose rank1:{}, rank2:{}'.format(fc_layer_to_decompose.in_features, fc_layer_to_decompose.out_features)) print('\n{}Layer Info{}'.format('\033[31m', '\033[0m')) print('{}-----------------------------------------{}'.format('\033[94m', '\033[0m')) print('Rank1:{}, Rank2:{}'.format(fc_layer_to_decompose.in_features, fc_layer_to_decompose.out_features)) print(self.estimate(fc_layer_to_decompose, key)) self.decomposed_layer_info['key'] = key if(self.constrain == 0): sels.fc_target_rate = 0.5 else: self.fc_target_rate = 0 beta = 0.5 last_search_time = 1000000 iteration_count = 0 while(True): ''' # prevent TVM from disconnecting save_model_name = export_onnx_model(self.model) _ = deploy_by_rpc(save_model_name) os.remove(save_model_name) ''' print('{}Iteration{}'.format('\033[31m', '\033[0m')) print('{}-----------------------------------------{}'.format('\033[94m', '\033[0m')) print('Iteration {}{}{} Target_Rate: {}{}{}'.format('\033[32m', iteration_count, '\033[0m', '\033[32m', self.fc_target_rate, '\033[0m')) iteration_count += 1 if(self.search_runtime['fc_'+key] == tmp_VBMF_layer_runtime['fc_'+key]): rank = [self.VBMF_layer_rank['fc_'+key][0]] b_iter = 0 else: rank, b_iter = self.fc_bayesian(fc_layer_to_decompose.in_features*fc_layer_to_decompose.out_features//(fc_layer_to_decompose.in_features+fc_layer_to_decompose.out_features)) ''' if(key == '4'): if(iteration_count == 1): rank = 1500 else: rank = 1500 * (0.9**(iteration_count-1)) rank = [int(rank)] b_iter = 0 else: rank, b_iter = self.fc_bayesian(fc_layer_to_decompose.in_features*fc_layer_to_decompose.out_features//(fc_layer_to_decompose.in_features+fc_layer_to_decompose.out_features)) ''' self.bayesian_iter['fc_'+key] = b_iter #rank = self.fc_bayesian(fc_layer_to_decompose.in_features*fc_layer_to_decompose.out_features//(fc_layer_to_decompose.in_features+fc_layer_to_decompose.out_features)) tmp_ms_1 = self.estimate_with_config([fc_layer_to_decompose.in_features, rank[0]]) tmp_ms_2 = self.estimate_with_config([rank[0], fc_layer_to_decompose.out_features]) tmp_runtime_ms = tmp_ms_1 + tmp_ms_2 #assert (tmp_runtime_ms - self.VBMF_layer_runtime['fc_'+key]) > 0 if(self.constrain == 0): break else: if((tmp_runtime_ms) <= (self.search_runtime['fc_'+ key] + self.remain_budget)): print('Travis Constrain: {}, Search_runtime: {}, Layer_budget: {}, Remain_budget: {}'.format(self.constrain, \ tmp_runtime_ms, \ self.search_runtime['fc_'+key], self.remain_budget)) print('Travis Layer_budget + Remain_budget: {}'.format(self.search_runtime['fc_'+key]+self.remain_budget)) self.remain_budget = (self.search_runtime['fc_'+key] + self.remain_budget) - (tmp_runtime_ms) assert self.remain_budget >= 0 print('Updated Remain budget: {}'.format(self.remain_budget)) break else: print('Update the objective function ...') if(last_search_time < tmp_runtime_ms): beta += 0.25 else: last_search_time = tmp_runtime_ms if(iteration_count >= 10): beta += 0.25 self.fc_target_rate += beta print('Travis Constrain: {}, Search_runtime: {}, Layer_budget: {}, Remain_budget: {}'.format(self.constrain, \ tmp_runtime_ms, \ self.search_runtime['fc_'+key], self.remain_budget)) print('Travis Layer_budget + Remain_budget: {}'.format(self.search_runtime['fc_'+key]+self.remain_budget)) decompose = tucker_decomposition_fc_layer_without_rank(self.model.classifier._modules[key],rank) self.model.classifier._modules[key] = decompose fine_tune(self.model, 10) fine_tune_test(self.model, self.testloader, True)
def conv_decomposition(self): VBMF_model = torch.load('checkpoint/VBMF_alexnet_model') _, tmp_VBMF_layer_runtime = self.get_model_predict_runtime(VBMF_model) #remain_budget = 0.0 for i, key in enumerate(self.model.features._modules.keys()): if(i == 0): continue if isinstance(self.model.features._modules[key], torch.nn.modules.conv.Conv2d): conv_layer_to_decompose = self.model.features._modules[key] #print('\nTravis conv_layer_to_decompose rank1:{}, rank2:{}'.format(conv_layer_to_decompose.in_channels, conv_layer_to_decompose.out_channels)) print('\n{}Layer Info{}'.format('\033[31m', '\033[0m')) print('{}-----------------------------------------{}'.format('\033[94m', '\033[0m')) print('Rank1:{}, Rank2:{}'.format(conv_layer_to_decompose.in_channels, conv_layer_to_decompose.out_channels)) #print(self.estimate(conv_layer_to_decompose, key)) self.decomposed_layer_info['key'] = key self.decomposed_layer_info['image_size'] = self.model_image_size[key][0] self.decomposed_layer_info['kernel_size'] = conv_layer_to_decompose.kernel_size[0] self.decomposed_layer_info['stride'] = conv_layer_to_decompose.stride[0] self.decomposed_layer_info['padding'] = conv_layer_to_decompose.padding[0] if(self.constrain == 0): self.conv_target_rate = 0.5 else: self.conv_target_rate = 0.0 beta = 1 last_search_time = 1000000 iteration_count = 0 while(True): ''' # prevent TVM from disconnecting save_model_name = export_onnx_model(self.model) _ = deploy_by_rpc(save_model_name) os.remove(save_model_name) ''' print('{}Iteration{}'.format('\033[31m', '\033[0m')) print('{}-----------------------------------------{}'.format('\033[94m', '\033[0m')) print('Iteration {}{}{} Target_Rate: {}{}{}'.format('\033[32m', iteration_count, '\033[0m', '\033[32m', self.conv_target_rate, '\033[0m')) iteration_count += 1 if(self.search_runtime['conv_'+key] == tmp_VBMF_layer_runtime['conv_'+key]): ranks = [self.VBMF_layer_rank['conv_'+key][0], self.VBMF_layer_rank['conv_'+key][1]] b_iter = 0 else: ranks, b_iter = self.conv_bayesian([conv_layer_to_decompose.in_channels, conv_layer_to_decompose.out_channels]) self.bayesian_iter['conv_'+key] = b_iter #ranks = self.conv_bayesian([conv_layer_to_decompose.in_channels, conv_layer_to_decompose.out_channels]) tmp_ms_1 = self.estimate_with_config([self.decomposed_layer_info['image_size'], \ conv_layer_to_decompose.in_channels, ranks[0], \ 1, \ self.decomposed_layer_info['stride'], \ self.decomposed_layer_info['padding']]) tmp_ms_2 = self.estimate_with_config([self.decomposed_layer_info['image_size'], \ ranks[0], ranks[1], \ self.decomposed_layer_info['kernel_size'], \ self.decomposed_layer_info['stride'], \ self.decomposed_layer_info['padding']]) tmp_ms_3 = self.estimate_with_config([self.decomposed_layer_info['image_size'], \ ranks[1], conv_layer_to_decompose.out_channels, \ 1, \ self.decomposed_layer_info['stride'], \ self.decomposed_layer_info['padding']]) tmp_runtime_ms = tmp_ms_1 + tmp_ms_2 + tmp_ms_3 print('Travis tmp_ms_1: {}, tmp_ms_2: {}, tmp_ms_3: {}'.format(tmp_ms_1, tmp_ms_2, tmp_ms_3)) if(self.constrain == 0): break else: if((tmp_runtime_ms) <= (self.search_runtime['conv_'+ key] + self.remain_budget)): print('Travis Constrain: {}, Search_runtime: {}, Layer_budget: {}, Remain_budget: {}'.format(self.constrain, \ tmp_runtime_ms, \ self.search_runtime['conv_'+key], self.remain_budget)) print('Travis Layer_budget + Remain_budget: {}'.format(self.search_runtime['conv_'+key]+self.remain_budget)) self.remain_budget = (self.search_runtime['conv_'+key] + self.remain_budget) - (tmp_runtime_ms) assert self.remain_budget >= 0 print('Updated Remain Budget: {}'.format(self.remain_budget)) break else: print('Update the objective function ...') if(last_search_time < tmp_runtime_ms): beta += 0.5 else: last_search_time = tmp_runtime_ms if(iteration_count >= 10): beta += 0.5 self.conv_target_rate += beta print('Travis Constrain: {}, Search_runtime: {}, Layer_budget: {}, Remain_budget: {}'.format(self.constrain, \ tmp_runtime_ms, \ self.search_runtime['conv_'+key], self.remain_budget)) print('Travis Layer_budget + Remain_budget: {}'.format(self.search_runtime['conv_'+key]+self.remain_budget)) print('{}Fine Tune{}'.format('\033[31m', '\033[0m')) print('{}-----------------------------------------{}'.format('\033[94m', '\033[0m')) ranks[0], ranks[1] = ranks[1], ranks[0] decompose = tucker_decomposition_conv_layer_without_rank(self.model.features._modules[key],ranks) self.model.features._modules[key] = decompose fine_tune(self.model, 10) fine_tune_test(self.model, self.testloader, True)
def fc_decomposition(self): #remain_budget = 0.0 N_classifier = len(self.model.classifier._modules.keys()) for i, key in enumerate(self.model.classifier._modules.keys()): if i >= N_classifier - 2: break if isinstance(self.model.classifier._modules[key], torch.nn.modules.linear.Linear): fc_layer_to_decompose = self.model.classifier._modules[key] #print('Travis fc_layer_to_decompose rank1:{}, rank2:{}'.format(fc_layer_to_decompose.in_features, fc_layer_to_decompose.out_features)) print('\n{}Layer Info{}'.format('\033[31m', '\033[0m')) print('{}-----------------------------------------{}'.format( '\033[94m', '\033[0m')) print('Rank1:{}, Rank2:{}'.format( fc_layer_to_decompose.in_features, fc_layer_to_decompose.out_features)) print(self.estimate(fc_layer_to_decompose, key)) self.decomposed_layer_info['key'] = key self.fc_target_rate = 0.5 iteration_count = 0 while (True): print('{}Iteration{}'.format('\033[31m', '\033[0m')) print( '{}-----------------------------------------{}'.format( '\033[94m', '\033[0m')) print('Iteration {}{}{} Target_Rate: {}{}{}'.format( '\033[32m', iteration_count, '\033[0m', '\033[32m', self.fc_target_rate, '\033[0m')) iteration_count += 1 rank = self.fc_bayesian( fc_layer_to_decompose.in_features * fc_layer_to_decompose.out_features // (fc_layer_to_decompose.in_features + fc_layer_to_decompose.out_features)) tmp_ms_1 = self.estimate_with_config( [fc_layer_to_decompose.in_features, rank[0]]) tmp_ms_2 = self.estimate_with_config( [rank[0], fc_layer_to_decompose.out_features]) tmp_runtime_ms = tmp_ms_1 + tmp_ms_2 assert (tmp_runtime_ms - self.VBMF_layer_runtime['fc_' + key]) > 0 if (self.constrain == 0 or (tmp_runtime_ms - self.VBMF_layer_runtime['fc_' + key]) <= (self.layer_budget['fc_' + key] + self.remain_budget)): print('Travis Constrain: {}, Search_runtime: {}, Layer_budget: {}, Search_layer_runtime: {}, VBMF_layer_runtime: {}, Remain_budget: {}'.format(self.constrain, \ tmp_runtime_ms-self.VBMF_layer_runtime['fc_'+key], \ self.layer_budget['fc_'+key], tmp_runtime_ms, self.VBMF_layer_runtime['fc_'+key], self.remain_budget)) print('Travis Layer_budget + Remain_budget: {}'.format( self.layer_budget['fc_' + key] + self.remain_budget)) self.remain_budget = ( self.layer_budget['fc_' + key] + self.remain_budget ) - (tmp_runtime_ms - self.VBMF_layer_runtime['fc_' + key]) assert self.remain_budget >= 0 print('Updated Remain budget: {}'.format( self.remain_budget)) break else: print('Update the objective function ...') self.fc_target_rate += 0.1 print('Travis Constrain: {}, Search_runtime: {}, Layer_budget: {}, Search_layer_runtime: {}, VBMF_layer_runtime: {}, Remain_budget: {}'.format(self.constrain, \ tmp_runtime_ms-self.VBMF_layer_runtime['fc_'+key], \ self.layer_budget['fc_'+key], tmp_runtime_ms, self.VBMF_layer_runtime['fc_'+key], self.remain_budget)) print('Travis Layer_budget + Remain_budget: {}'.format( self.layer_budget['fc_' + key] + self.remain_budget)) decompose = tucker_decomposition_fc_layer_without_rank( self.model.classifier._modules[key], rank) self.model.classifier._modules[key] = decompose fine_tune(self.model, 10) fine_tune_test(self.model, self.testloader, True)
def conv_decomposition(self): #remain_budget = 0.0 for i, key in enumerate(self.model.features._modules.keys()): if (i == 0): continue if isinstance(self.model.features._modules[key], torch.nn.modules.conv.Conv2d): conv_layer_to_decompose = self.model.features._modules[key] #print('\nTravis conv_layer_to_decompose rank1:{}, rank2:{}'.format(conv_layer_to_decompose.in_channels, conv_layer_to_decompose.out_channels)) print('\n{}Layer Info{}'.format('\033[31m', '\033[0m')) print('{}-----------------------------------------{}'.format( '\033[94m', '\033[0m')) print('Rank1:{}, Rank2:{}'.format( conv_layer_to_decompose.in_channels, conv_layer_to_decompose.out_channels)) #print(self.estimate(conv_layer_to_decompose, key)) self.decomposed_layer_info['key'] = key self.decomposed_layer_info[ 'image_size'] = self.model_image_size[key][0] self.decomposed_layer_info[ 'kernel_size'] = conv_layer_to_decompose.kernel_size[0] self.decomposed_layer_info[ 'stride'] = conv_layer_to_decompose.stride[0] self.decomposed_layer_info[ 'padding'] = conv_layer_to_decompose.padding[0] self.conv_target_rate = 0.5 iteration_count = 0 while (True): print('{}Iteration{}'.format('\033[31m', '\033[0m')) print( '{}-----------------------------------------{}'.format( '\033[94m', '\033[0m')) print('Iteration {}{}{} Target_Rate: {}{}{}'.format( '\033[32m', iteration_count, '\033[0m', '\033[32m', self.conv_target_rate, '\033[0m')) iteration_count += 1 ranks = self.conv_bayesian([ conv_layer_to_decompose.in_channels, conv_layer_to_decompose.out_channels ]) tmp_ms_1 = self.estimate_with_config([self.decomposed_layer_info['image_size'], \ conv_layer_to_decompose.in_channels, ranks[0], \ 1, \ self.decomposed_layer_info['stride'], \ self.decomposed_layer_info['padding']]) tmp_ms_2 = self.estimate_with_config([self.decomposed_layer_info['image_size'], \ ranks[0], ranks[1], \ self.decomposed_layer_info['kernel_size'], \ self.decomposed_layer_info['stride'], \ self.decomposed_layer_info['padding']]) tmp_ms_3 = self.estimate_with_config([self.decomposed_layer_info['image_size'], \ ranks[1], conv_layer_to_decompose.out_channels, \ 1, \ self.decomposed_layer_info['stride'], \ self.decomposed_layer_info['padding']]) tmp_runtime_ms = tmp_ms_1 + tmp_ms_2 + tmp_ms_3 print('Travis tmp_ms_1: {}, tmp_ms_2: {}, tmp_ms_3: {}'. format(tmp_ms_1, tmp_ms_2, tmp_ms_3)) ''' print('Travis tmp_ms_1: {}, image_size: {}, in_channel: {}, out_channel: {}, kernel_size: {}, stride: {}, padding: {}'.format( \ tmp_ms_1, self.decomposed_layer_info['image_size'], conv_layer_to_decompose.in_channels, ranks[0], \ 1, self.decomposed_layer_info['stride'], self.decomposed_layer_info['padding'])) print('Travis tmp_ms_2: {}, image_size: {}, in_channel: {}, out_channel: {}, kernel_size: {}, stride: {}, padding: {}'.format( \ tmp_ms_2, self.decomposed_layer_info['image_size'], ranks[0], ranks[1], \ self.decomposed_layer_info['kernel_size'], self.decomposed_layer_info['stride'], self.decomposed_layer_info['padding'])) print('Travis tmp_ms_3: {}, image_size: {}, in_channel: {}, out_channel: {}, kernel_size: {}, stride: {}, padding: {}'.format( \ tmp_ms_3, self.decomposed_layer_info['image_size'], ranks[1], conv_layer_to_decompose.out_channels, \ 1, self.decomposed_layer_info['stride'], self.decomposed_layer_info['padding'])) ''' assert (tmp_runtime_ms - self.VBMF_layer_runtime['conv_' + key]) > 0 if (self.constrain == 0 or (tmp_runtime_ms - self.VBMF_layer_runtime['conv_' + key]) <= (self.layer_budget['conv_' + key] + self.remain_budget)): print('Travis Constrain: {}, Search_runtime: {}, Layer_budget: {}, Search_layer_runtime: {}, VBMF_layer_runtime: {}, Remain_budget: {}'.format(self.constrain, \ tmp_runtime_ms-self.VBMF_layer_runtime['conv_'+key], \ self.layer_budget['conv_'+key], tmp_runtime_ms, self.VBMF_layer_runtime['conv_'+key], self.remain_budget)) print('Travis Layer_budget + Remain_budget: {}'.format( self.layer_budget['conv_' + key] + self.remain_budget)) self.remain_budget = ( self.layer_budget['conv_' + key] + self.remain_budget) - ( tmp_runtime_ms - self.VBMF_layer_runtime['conv_' + key]) assert self.remain_budget >= 0 print('Updated Remain Budget: {}'.format( self.remain_budget)) break else: print('Update the objective function ...') self.conv_target_rate += 0.05 print('Travis Constrain: {}, Search_runtime: {}, Layer_budget: {}, Search_layer_runtime: {}, VBMF_layer_runtime: {}, Remain_budget: {}'.format(self.constrain, \ tmp_runtime_ms-self.VBMF_layer_runtime['conv_'+key], \ self.layer_budget['conv_'+key], tmp_runtime_ms, self.VBMF_layer_runtime['conv_'+key], self.remain_budget)) print('Travis Layer_budget + Remain_budget: {}'.format( self.layer_budget['conv_' + key] + self.remain_budget)) print('{}Fine Tune{}'.format('\033[31m', '\033[0m')) print('{}-----------------------------------------{}'.format( '\033[94m', '\033[0m')) ranks[0], ranks[1] = ranks[1], ranks[0] decompose = tucker_decomposition_conv_layer_without_rank( self.model.features._modules[key], ranks) self.model.features._modules[key] = decompose fine_tune(self.model, 10) fine_tune_test(self.model, self.testloader, True)
def inference(hps): # Prepare dir print('test_iterate', hps.test_iterate) print('use_diverse_beam_search', hps.use_diverse_beam_search) print('write_all_beam', hps.write_all_beam) print('top_beams', hps.top_beams) print('beam_width', hps.beam_size) if hps.mode == 'test': result_dir = os.path.join( hps.output_dir, hps.test_file + '-results-%d/' % hps.test_iterate) else: result_dir = os.path.join( hps.output_dir, hps.dev_file + '-results-%d/' % hps.test_iterate) ref_dir, decode_dir = os.path.join(result_dir, 'ref'), os.path.join( result_dir, 'pred') decode_dir_pred = os.path.join(result_dir, 'pred') collected_file = os.path.join(result_dir, 'all.txt') dec_dir_stage_1, dec_dir_stage_2 = decode_dir + '_1', decode_dir + '_2' trunc_dec_dir = os.path.join(result_dir, 'trunc_pred') if not os.path.exists(ref_dir): os.makedirs(ref_dir) if not os.path.exists(decode_dir): os.makedirs(decode_dir) if not os.path.exists(dec_dir_stage_1): os.makedirs(dec_dir_stage_1) if not os.path.exists(dec_dir_stage_2): os.makedirs(dec_dir_stage_2) if not os.path.exists(trunc_dec_dir): os.makedirs(trunc_dec_dir) abs2sents_func = abstract2sents_func(hps) if hps.eval_only: # calculate rouge and other metrics print('calculate rouge...') final_pred_dir = trunc_dec_dir if hps.task_name == 'nyt' else decode_dir results_dict = rouge_eval(ref_dir, final_pred_dir) rouge_log(results_dict, decode_dir) fine_tune(hps) exit() # Load configs bert_config = modeling.BertConfig.from_json_file(hps.bert_config_file) bert_config.max_position_embeddings = max( bert_config.max_position_embeddings, hps.max_out_seq_length * 2) model = model_pools[hps.model_name] processor = processors[hps.task_name.lower()](hps) validate_batcher = Batcher(processor, hps) # Build model graph print("create inference model...") dev_model = model(bert_config, validate_batcher, hps) dev_model.create_or_load_recent_model() print("inference model done") # Prepare results_num = 0 idx, skipped_num = 0, 0 infer_type = determine_infer_type(dev_model) # build inference graph logging.info('Build inference graph...') print('Build inference graph...') pred_seq, _ = create_infer_op(dev_model, hps) fine_tuned_seq = create_infer_op_2( dev_model, hps.use_beam_search ) if infer_type == InferType.two_step or infer_type == InferType.three_step else None sent_fine_tune = create_infer_op_sent( dev_model) if infer_type == InferType.three_step else None logging.info('Start inference...') print('Start inference...') res_dirs = [] while True: # predict one batch batch = dev_model.batcher.next_batch() if not batch: break if all_batch_already_decoded(ref_dir, decode_dir, idx, len(batch.source_ids)): idx += len(batch.source_ids) skipped_num += len(batch.source_ids) continue # inference ids seq if infer_type == InferType.single: ids_results, ids_all_candidates = single_stage_model_inference( dev_model, pred_seq, batch, hps) res_dirs = [decode_dir] elif infer_type == InferType.three_step: ids_results, ids_all_candidates = three_stage_model_inference( dev_model, pred_seq, fine_tuned_seq, sent_fine_tune, batch, hps) res_dirs = [dec_dir_stage_1, dec_dir_stage_2, decode_dir] else: ids_results, ids_all_candidates = two_stage_model_inference( dev_model, pred_seq, fine_tuned_seq, batch, hps) res_dirs = [dec_dir_stage_1, decode_dir] # convert to string decode_result = [ decode_target_ids(each_seq_ids, batch, hps) for each_seq_ids in [ids_results] ] n_top = hps.top_beams all_candidates = [] for i in range(n_top): all_candidates.append( [decode_target_ids(ids_all_candidates[i], batch, hps)]) results_num += batch.true_num # save ref and label batch_summaries = [[ sent.strip() for sent in abs2sents_func(each.summary) ] for each in batch.original_data] idx = write_batch_for_rouge(batch_summaries, decode_result, idx, ref_dir, res_dirs, trunc_dec_dir, hps, batch.true_num, batch.original_data, decode_dir_pred, collected_file) if hps.write_all_beam: write_all_beam_candidates(batch.original_data, all_candidates, n_top, result_dir, batch.true_num, hps, idx) logging.info("Finished sample %d" % (results_num + skipped_num)) logging.info('Start calculate ROUGE...') print('Start calculate ROUGE...') # calculate rouge and other metrics for i in range(len(res_dirs) - 1): results_dict = rouge_eval(ref_dir, res_dirs[i]) rouge_log(results_dict, res_dirs[i]) final_pred_dir = trunc_dec_dir if hps.task_name == 'nyt' else decode_dir results_dict = rouge_eval(ref_dir, final_pred_dir) rouge_log(results_dict, decode_dir) logging.info('Start fine tune the predictions...') fine_tune(hps)
def conv_decomposition(self): VBMF_model = torch.load('checkpoint/VBMF_alexnet_model') _, tmp_VBMF_layer_runtime = self.get_model_predict_runtime(VBMF_model) #remain_budget = 0.0 for i, key in enumerate(self.model.features._modules.keys()): if (i == 0): continue if isinstance(self.model.features._modules[key], torch.nn.modules.conv.Conv2d): conv_layer_to_decompose = self.model.features._modules[key] #print('\nTravis conv_layer_to_decompose rank1:{}, rank2:{}'.format(conv_layer_to_decompose.in_channels, conv_layer_to_decompose.out_channels)) print('\n{}Layer Info{}'.format('\033[31m', '\033[0m')) print('{}-----------------------------------------{}'.format( '\033[94m', '\033[0m')) print('Rank1:{}, Rank2:{}'.format( conv_layer_to_decompose.in_channels, conv_layer_to_decompose.out_channels)) #print(self.estimate(conv_layer_to_decompose, key)) self.decomposed_layer_info['key'] = key self.decomposed_layer_info[ 'image_size'] = self.model_image_size[key][0] self.decomposed_layer_info[ 'kernel_size'] = conv_layer_to_decompose.kernel_size[0] self.decomposed_layer_info[ 'stride'] = conv_layer_to_decompose.stride[0] self.decomposed_layer_info[ 'padding'] = conv_layer_to_decompose.padding[0] self.conv_target_rate = 0.5 ranks, b_iter = self.conv_bayesian([ conv_layer_to_decompose.in_channels, conv_layer_to_decompose.out_channels ]) self.bayesian_iter['conv_' + key] = b_iter #ranks = self.conv_bayesian([conv_layer_to_decompose.in_channels, conv_layer_to_decompose.out_channels]) tmp_ms_1 = self.estimate_with_config([self.decomposed_layer_info['image_size'], \ conv_layer_to_decompose.in_channels, ranks[0], \ 1, \ self.decomposed_layer_info['stride'], \ self.decomposed_layer_info['padding']]) tmp_ms_2 = self.estimate_with_config([self.decomposed_layer_info['image_size'], \ ranks[0], ranks[1], \ self.decomposed_layer_info['kernel_size'], \ self.decomposed_layer_info['stride'], \ self.decomposed_layer_info['padding']]) tmp_ms_3 = self.estimate_with_config([self.decomposed_layer_info['image_size'], \ ranks[1], conv_layer_to_decompose.out_channels, \ 1, \ self.decomposed_layer_info['stride'], \ self.decomposed_layer_info['padding']]) tmp_runtime_ms = tmp_ms_1 + tmp_ms_2 + tmp_ms_3 print('Travis tmp_ms_1: {}, tmp_ms_2: {}, tmp_ms_3: {}'.format( tmp_ms_1, tmp_ms_2, tmp_ms_3)) print('{}Fine Tune{}'.format('\033[31m', '\033[0m')) print('{}-----------------------------------------{}'.format( '\033[94m', '\033[0m')) ranks[0], ranks[1] = ranks[1], ranks[0] decompose = tucker_decomposition_conv_layer_without_rank( self.model.features._modules[key], ranks) self.model.features._modules[key] = decompose fine_tune(self.model, 10) fine_tune_test(self.model, self.testloader, True)