def quantize_and_run_any(network, qbits): print('network:', network) input_file = './nn_quant_and_run_code/rlbitwidth.tfmodels/caffe2tf/tfmodels/' + network +'/' + network +'.ckpt' print('==================================================================') print('Quantization') print('==================================================================') """ Quantization """ nbits = 10 path_save = './nn_quant_and_run_code/results/quantized/'+ network +'/' path_save_q = path_save + network +'_layers_quant_'+ str(nbits) +'-bits_date.pickle' #layers_sorted = load.get_layers('/backup/amir-tc/rl_quantization/rl_quantization.code/nn_quant_and_run_code/rlbitwidth.tfmodels/caffe2tf/tfmodels/alexnet/alexnet.py') layers_sorted = load.get_layers('./nn_quant_and_run_code_train/rlbitwidth.tfmodels/caffe2tf/tfmodels/'+ network +'/'+ network +'.py') #bits_q = [nbits] * len(layers_sorted) bits_q = qbits path_params = input_file quantize_network(path_params, layers_sorted, path_save_q, bits_q) print('==================================================================') print('INFERENCE') print('==================================================================') """ Run Inference """ #path_save_q = path_save + '_layers_shift_quant_10May.pickle' #param_path = save_path_params #param_path = '/home/ahmed/projects/NN_quant/results/quantized/resnet18/resnet18_layers_shift_quant_retrain_A_10May.pickle' param_path = path_save_q with tf.Graph().as_default(): acc, netparams = eval_imagenet(network, param_path, shift_back, trainable=False, err_mean=None, err_stddev=None, train_vars=None, cost_factor=0., n_epoch=1) return acc
def run_inference(network, input_param_path, qbits): global HOME print('==================================================================') print('Quantization') print('==================================================================') """ Quantization """ nbits = 10 path_save = HOME+'/nn_quant_and_run_code/results/quantized/'+ network +'/' # SECOND file: quantized after retraining path_save_q = path_save + network +'train_1_test_retrained_quantized.pickle' #layers_sorted = load.get_layers('/backup/amir-tc/rl_quantization/rl_quantization.code/nn_quant_and_run_code/rlbitwidth.tfmodels/caffe2tf/tfmodels/alexnet/alexnet.py') layers_sorted = load.get_layers(HOME+'/nn_quant_and_run_code_train/rlbitwidth.tfmodels/caffe2tf/tfmodels/'+ network +'/'+ network +'.py') #bits_q = [nbits] * len(layers_sorted) bits_q = qbits path_params = input_param_path quantize_network(path_params, layers_sorted, path_save_q, bits_q) print('==================================================================') print('INFERENCE') print('==================================================================') #param_path = input_param_path param_path = path_save_q param_q_path = '' layer_index = 0 layer_name = 0 file_idx = 0 shift_back = {} with tf.Graph().as_default(): acc, netparams = eval_imagenet(network, param_path, param_q_path, qbits, layer_index, layer_name, file_idx, shift_back, trainable=False, err_mean=None, err_stddev=None, train_vars=None, cost_factor=0., n_epoch=1) return acc
def get_stats(network_name): # get weights netparams = load.get_netparams( './nn_quant_and_run_code_train/rlbitwidth.tfmodels/caffe2tf/tfmodels/' + network_name + '/' + network_name + '.ckpt') weights = netparams['weights'] # get layers layers_sorted = load.get_layers( './nn_quant_and_run_code_train/rlbitwidth.tfmodels/caffe2tf/tfmodels/' + network_name + '/' + network_name + '.py') tot_num_layers = len(layers_sorted) cols = ['layer_idx_norm', 'n', 'c', 'k', 'std'] tmp_lst = [] std_lst = [] for i, layer in enumerate(layers_sorted, start=1): layer_shape = weights[layer].shape if len(layer_shape) == 2: k = 0 n, c = layer_shape else: k, _, n, c = layer_shape weights_layer = weights[layer].ravel() idx_norm = i / tot_num_layers std = np.var(weights_layer) std_lst.append(std) tmp_lst.append([idx_norm, n, c, k, std]) df = pd.DataFrame(tmp_lst, columns=cols) return df, std_lst # to access --> df.loc[i, 'std']
def run_inference(network, input_param_path, qbits): print('==================================================================') print('Quantization') print('==================================================================') """ Quantization """ nbits = 10 path_save = '../nn_quant_and_run_code/results/quantized/'+ network +'/' path_save_q = path_save + network +'_retrained_quantized.pickle' #layers_sorted = load.get_layers('/backup/amir-tc/rl_quantization/rl_quantization.code/nn_quant_and_run_code/rlbitwidth.tfmodels/caffe2tf/tfmodels/alexnet/alexnet.py') layers_sorted = load.get_layers('./rlbitwidth.tfmodels/caffe2tf/tfmodels/'+ network +'/'+ network +'.py') #bits_q = [nbits] * len(layers_sorted) bits_q = qbits path_params = input_param_path quantize_network(path_params, layers_sorted, path_save_q, bits_q) print('==================================================================') print('INFERENCE') print('==================================================================') #param_path = input_param_path param_path = path_save_q with tf.Graph().as_default(): acc, netparams = eval_imagenet(network, param_path, shift_back, trainable=False, err_mean=None, err_stddev=None, train_vars=None, cost_factor=0., n_epoch=1) return acc
def quantize_and_train(network_name, layer_index, layer_name, qbits, idx, init_params, file_idx): """ full precision """ #input_file = './rlbitwidth.tfmodels/caffe2tf/tfmodels/' + network_name + '/' + network_name + '.ckpt' date = '110118' date = '110618' date = date + '_' + str(idx) global HOME print('==================================================================') print('Quantization') print('==================================================================') """ Quantization """ """ 1) we initialize based on the quantized input pattern (?) """ path_save = HOME+'/nn_quant_and_run_code/results/quantized/' + network_name + '/' out_path_save_q = path_save + 'quantized_' + network_name + '_layers_'+date+'.pickle' layers_sorted = load.get_layers(HOME+'/nn_quant_and_run_code_train/rlbitwidth.tfmodels/caffe2tf/tfmodels/' + network_name + '/' + network_name + '.py') """ always start with the most recent retrained model """ in_path_params = init_params quantize_network(in_path_params, layers_sorted, out_path_save_q, qbits) print('==================================================================') print('TRAINING') print('==================================================================') """ Run retraining """ """ use the full precision weights for initialization, or the most recent retrained """ """ this is used to calculate the quantization difference regularizer """ in_param_path = init_params param_q_path = out_path_save_q _, std = get_stats(network_name) # DEBUG: print('std : ####################') print(std) # ------------- with tf.Graph().as_default(): acc, netparams = eval_imagenet(network_name, in_param_path, param_q_path, qbits, std, layer_index, layer_name, file_idx, shift_back, trainable=True, err_mean=None, err_stddev=None, train_vars=None, cost_factor=100., n_epoch=1) print(acc) """ path for saving the retrained model """ path_save = HOME+'/nn_quant_and_run_code/results/quantized/' + network_name + '/' retrained_path = path_save + 'retrained_'+date+'.pickle' # AHMED: debug #print('retrained = ', np.amax(netparams['weights']['conv2'])) #print('len set = ', len(set(np.array(netparams['weights']['conv2'])))) # ------------ with open(retrained_path, 'wb') as f: pickle.dump(netparams, f) print('==================================================================') print('Re-TRAINING DONE!') print('==================================================================') return acc
def quantize_and_train(qbits): input_file = './rlbitwidth.tfmodels/caffe2tf/tfmodels/alexnet/alexnet.ckpt' print('==================================================================') print('Quantization') print('==================================================================') """ Quantization """ """ 1) we initialize based on the quantized input pattern""" nbits = 16 path_save = '../nn_quant_and_run_code/results/quantized/alexnet/' path_save_q = path_save + 'train_1_init_alexnet_layers_quant_16Oct.pickle' #layers_sorted = load.get_layers('/backup/amir-tc/rl_quantization/rl_quantization.code/nn_quant_and_run_code/rlbitwidth.tfmodels/caffe2tf/tfmodels/alexnet/alexnet.py') layers_sorted = load.get_layers( './rlbitwidth.tfmodels/caffe2tf/tfmodels/alexnet/alexnet.py') #bits_q = [nbits] * len(layers_sorted) bits_q = qbits path_params = input_file quantize_network(path_params, layers_sorted, path_save_q, bits_q) print('==================================================================') print('TRAINING') print('==================================================================') """ Run Training """ #param_path = path_save_q # use the full precision weights for initialization param_path = input_file path_save = '../nn_quant_and_run_code/results/quantized/alexnet/alexnet' save_path_params = path_save + 'train_1_layers_quant_retrained_16Oct.pickle' acc, netparams = eval_imagenet('alexnet', param_path, qbits, shift_back, trainable=True, err_mean=None, err_stddev=None, train_vars=None, cost_factor=75., n_epoch=1) print(acc) # AHMED: debug #print('retrained = ', np.amax(netparams['weights']['conv2'])) #print('len set = ', len(set(np.array(netparams['weights']['conv2'])))) # ------------ with open(save_path_params, 'wb') as f: pickle.dump(netparams, f) print('==================================================================') print('TRAINING DONE!') print('==================================================================')
def run_network(net_name, param_path, qbits, istrain, cost_factor, n_epoch): if param_path: path = param_path else: ckpt_path = CKPT_PATH + net_name + '/' + net_name + '.ckpt' path = ckpt_path err_mean = [0.0, 0.0, 0.0, 0.0] #order: input, weights, biases, layers err_stddev = [0.0, 0.0, 0.0, 0.0] train_vars = [False, True, False, False] #istrain = True if qbits: """ Quantization """ print('==================================================================') print('Quantization') print('==================================================================') network = 'lenet' path_save = './quantized_models/'+ network +'/' path_save_q = path_save + network +'train_1_test_retrained_quantized.pickle' #layers_sorted = load.get_layers('/backup/amir-tc/rl_quantization/rl_quantization.code/nn_quant_and_run_code/rlbitwidth.tfmodels/caffe2tf/tfmodels/alexnet/alexnet.py') layers_sorted = load.get_layers('./rlbitwidth.tfmodels/caffe2tf/tfmodels/'+ network +'/'+ network +'.py') print('lenet #layers = ', len(layers_sorted)) bits_q = qbits quantize_network(path, layers_sorted, path_save_q, bits_q) path = path_save_q print('==================================================================') print('INFERENCE') print('==================================================================') param_path = path_save_q if net_name == 'lenet': return eval_lenet(net_name=net_name, param_path=param_path, qbits=[], layer_index=0, trainable=False, n_epoch=n_epoch) else: return eval_imagenet(net_name, ckpt_path, trainable=istrain, err_mean=err_mean, err_stddev=err_stddev, train_vars=train_vars, cost_factor=cost_factor, n_epoch=n_epoch)
def retrain(network_name, episode_num, layer_index, qbits): global HOME """ 1- read initial model (or the one from previous iteration) --> PARAMS(1) 2- From RL: - read layer index - read #bits 3- quantize: starting from "FP" (@ start of each episode) ->- quantize --> PARAMS_q 3''- quantize: starting from "recent_retrained" ->- quantize --> "PARAMS_q" 4- calculate the quantization error of the input layer: ||(FP(layer) - PARAMS_q(layer))||^2 4''- calculate the quantization error of the input layer: ||(recent_retrained(layer) - PARAMS_q(layer))||^2 5- add this quantization error to the objective function 6- initialize with PARAMS(1), fix previous layers (except 1st and last) and run retraining ... --> PARAMS_retrained 7- (caching!!) * assume independent retraining for independent episodes """ """ - init_params = is the parameter file for retraining initialization - if starting the episode, then init_params comes from the full precision ckpt, otherwise, it comes from the most recent retrained file """ global file_idx path_save = HOME + '/nn_quant_and_run_code/results/quantized/' + network_name + '/' + network_name path_params_retrained = path_save + '_train_1_layers_quant_retrained_17Oct_RL.pickle' #if path_params_retrained.is_file(): """ - init_params = is the parameter file for retraining initialization - if starting the episode, then init_params comes from the full precision ckpt, otherwise, it comes from the most recent retrained file """ #if (episode_num==0) and (layer_index==1): if (layer_index==1): init_params = HOME + '/nn_quant_and_run_code_train/rlbitwidth.tfmodels/caffe2tf/tfmodels/'+network_name+'/'+network_name+'.ckpt' """ randomly pick 100k images to retrain on """ #file_idx = random.randint(1,13) file_idx = 1 else: init_params = path_params_retrained """ accelerated fine-tuning """ layers_sorted = load.get_layers(HOME+'/nn_quant_and_run_code_train/rlbitwidth.tfmodels/caffe2tf/tfmodels/'+network_name+'/'+network_name+'.py') layer_name = layers_sorted[layer_index] init_params = HOME + '/nn_quant_and_run_code_train/rlbitwidth.tfmodels/caffe2tf/tfmodels/'+network_name+'/'+network_name+'.ckpt' """ validation accuracy before fine-tuning """ print('[a] validation ACC before retraining') print('##########################################################') #path_save = HOME+'/nn_quant_and_run_code/results/quantized/' + network_name + '/' + network_name #path_params_retrained = path_save + '_train_1_layers_quant_retrained_17Oct_RL.pickle' acc_val = run_inference(network_name, init_params, qbits) print(acc_val) print('[b] retraining ACC') print('##########################################################') acc_retrain = quantize_and_train(network_name, layer_index, layer_name, qbits, init_params, file_idx) #acc_retrain = 100 print('[c] validation after retraining') print('##########################################################') """ path for saving the retrained model """ # FIRST file: retrained file path_save = HOME+'/nn_quant_and_run_code/results/quantized/' + network_name + '/' + network_name #path_save_params = path_save + '_train_1_layers_quant_retrained_17Oct_RL.pickle' path_save_params = path_save + '_train_1_layers_quant_retrained_JAN_RL.pickle' acc_retrain_val = run_inference(network_name, path_save_params, qbits) return acc_val, acc_retrain, acc_retrain_val
save_path_params = '/home/behnam/results/weights_retrained/alexnet_conv2_retrained_test_f.pickle' # = {W1} Wo_bar_q = '/home/behnam/results/weights_retrained/alexnet_conv2_7.pickle' # save = {Wo}'q (quantized version) - then ll be used to check testing acc W1_q = '/home/behnam/results/weights_retrained/alexnet_conv2_7_W1q.pickle' Wo_bar = '/home/behnam/results/weights_retrained/alexnet_conv2_retrained_test.pickle' # = {Wo}' W1 = '/home/behnam/results/weights_retrained/alexnet_conv2_retrained_test_f.pickle' # = {W1} test = '/home/behnam/results/weights_retrained/test.pickle' # test = re-training using latest layers (fcs) Wo_alexnet = '/home/behnam/rlbitwidth.tfmodels/caffe2tf/tfmodels/alexnet/alexnet.ckpt' Wo_resent18 = '/home/behnam/rlbitwidth.tfmodels/caffe2tf/tfmodels/resnet18/resnet18.ckpt' test = '/home/behnam/results/weights_retrained/test_resnet_200k.pickle' # test = re-training input_file = Wo_resent18 output_file = test layers_sorted = load.get_layers( '/home/behnam/rlbitwidth.tfmodels/caffe2tf/tfmodels/resnet18/resnet18.py') shift_back = {} for layer in layers_sorted: shift_back[layer] = 0 # ===== FOR TRAINING ==================== #''' # this is for phase I training - retrain a little bit on new dataset 40K - get Wo' #param_path = '/home/behnam/rlbitwidth.tfmodels/caffe2tf/tfmodels/alexnet/alexnet.ckpt' # = {Wo} # this is for phase II training - retrain to minimize the quantization error - >> get W1 path_save = '/home/behnam/results/quantized/resnet18/resnet18' path_save_q = path_save + '_layers_shift_quant_10May.pickle' param_path = Wo_resent18 save_path_params = path_save + '_layers_shift_quant_retrain-A_10May.pickle'
if type(params) is dict: weights_, biases_ = params['weights'], params['biases'] if len(params) > 2: mean_, variance_, scale_, offset_ = params['mean'], params[ 'variance'], params['scale'], params['offset'] else: mean_, variance_, scale_, offset_ = {}, {}, {}, {} else: weights_, biases_ = params[0:2] if len(params) > 2: mean_, variance_, scale_, offset_ = params[2:6] else: mean_, variance_, scale_, offset_ = {}, {}, {}, {} # reading the layers and determining the #bits per layer layers = load.get_layers(path_net) bits_q = [5] * len(layers) bits_q[layers.index('conv2')] = 5 weights_q = {} SNR = {} MSE = {} MU_layers = [11, 41, 81, 61, 41, 17, 25, 11] for i in range(0, len(layers)): mu = MU_layers[i] mu = 0.5 #x = weights_[layers[i]].ravel() x = weights_[layers[i]] Xmax = np.amax(x) Xmax = 1 weights_q[layers[i]] = mu_law_quantize(x, bits_q[i], mu, Xmax)