def get_logits(data="cifar10", output="c10"): model_raw, ds_fetcher, is_imagenet = selector.select(data, cuda=False) ds_val = ds_fetcher(batch_size=100, train=False, val=True) outputs = [] targets = [] model_raw.eval() print("Start generating logits") for idx, (data, target) in enumerate(ds_val): data = Variable(torch.FloatTensor(data)) #.cuda() outputs.append(model_raw(data).detach().numpy()) targets.append(target.detach().numpy()) if idx % 10 == 0: print("Iteration:", idx) print("Saving logits") with open("logits_%s_pretrained.p" % output, "wb") as f: pickle.dump( (np.concatenate(outputs, axis=0), np.concatenate(targets, axis=0)), f) print("Done")
def __init__(self, digit, max_steps=200, min_steps=50): self.digit = digit self.nD = 8 # number of directions # cell state 0: blank, 1: drawn black self.grid = np.asarray([[0]*self.GRID_SIZE]*self.GRID_SIZE, dtype=int) self.nA = len(TurtleActions) # row, col, direction, cell color(0 or 1) self.nS = self.GRID_SIZE * self.GRID_SIZE * self.nD * 2 self.action_space = spaces.Discrete(self.nA) self.observation_space = spaces.Box(low=0, high=1, shape=(self.GRID_SIZE, self.GRID_SIZE, 1)) self.row = self.col = self.direction = 0 # mnist classifier model_raw, _, _ = selector.select('mnist', cuda=False) self.mnist_model = model_raw self.preprocess = transforms.Compose([ transforms.Normalize((0.1307,), (0.3081,)) ]) self.max_steps = max_steps self.min_steps = min_steps self.step_count = 0 self.seed() self.reset()
misc.ensure_dir(args.logdir) args.model_root = misc.expand_user(args.model_root) args.data_root = misc.expand_user(args.data_root) args.input_size = 299 if 'inception' in args.type else args.input_size assert args.quant_method in ['linear', 'minmax', 'log', 'tanh'] print("=================FLAGS==================") for k, v in args.__dict__.items(): print('{}: {}'.format(k, v)) print("========================================") assert torch.cuda.is_available(), 'no cuda' torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) # load model and dataset fetcher model_raw, ds_fetcher, is_imagenet = selector.select( args.type, model_root=args.model_root) model_quant, ds_fetcher, is_imagenet = selector.select( args.type, model_root=args.model_root) args.ngpu = args.ngpu if is_imagenet else 1 # Load dataset val_ds = ds_fetcher(args.batch_size, data_root=args.data_root, train=False, input_size=args.input_size) val_ds_quant = ds_fetcher(args.batch_size, data_root=args.data_root, train=False, input_size=args.input_size) # quantize parameters
misc.ensure_dir(args.logdir) args.model_root = misc.expand_user(args.model_root) args.data_root = misc.expand_user(args.data_root) args.input_size = 299 if 'inception' in args.type else args.input_size assert args.quant_method in ['linear', 'minmax', 'log', 'tanh', 'scale'] print("=================FLAGS==================") for k, v in args.__dict__.items(): print('{}: {}'.format(k, v)) print("========================================") assert torch.cuda.is_available(), 'no cuda' # load model and dataset fetcher if args.use_model_zoo: args.model_root = os.path.expanduser(args.model_root) model, ds_fetcher, is_imagenet = selector.select( model_name=args.type, model_root=args.model_root) args.ngpu = args.ngpu if is_imagenet else 1 else: args.model_root = '~/pytorch-mobilenet-v2/mobilenetv2_718.pth' args.type = "MobileNetV2" model, ds_fetcher = selector.find(model_name=args.type, model_root=args.model_root, net_root=args.net_root) q_model = copy.deepcopy(model) # replace bn layer #if args.replace_bn: # quant.replace_bn(model) # map bn to conv
def main(): # load model and dataset fetcher model_raw, ds_fetcher, is_imagenet = selector.select( args.type, model_root=args.model_root) args.ngpu = args.ngpu if is_imagenet else 1 # get valid layers valid_ind = [] layer_type_list = [] for i, layer in enumerate(model_raw.modules()): if type(layer) in valid_layer_types: valid_ind.append(i) layer_type_list.append(type(layer)) #get training dataset and validation dataset and dataset for computing importance train_ds = ds_fetcher(args.batch_size, data_root=args.data_root, val=False, input_size=args.input_size) val_ds = ds_fetcher(args.batch_size, data_root=args.data_root, train=False, input_size=args.input_size) # eval raw model if not is_imagenet: acc1_train, acc5_train, loss_train = eval_and_print(model_raw, train_ds, is_imagenet, is_train=True, prefix_str="Raw") acc1_val, acc5_val, loss_val = eval_and_print(model_raw, val_ds, is_imagenet, is_train=False, prefix_str="Raw") #Pruning stage by stage for stage in range(args.stage): #get pruning ratios if args.ratios is not None: ratios = [ math.pow(r, (stage + 1.0) / args.stage) for r in eval(args.ratios) ] # the actual ratio else: if args.fix_ratio is not None: ratios = [ math.pow(args.fix_ratio, (stage + 1.0) / args.stage) ] * len(valid_ind) else: raise NotImplementedError #get weight importance if args.mode == 'normal': weight_importance = get_importance(importance_type='normal') elif args.mode == 'hessian': weight_importance = get_importance(importance_type='hessian', t=args.temperature) weight_importance_id = get_importance(importance_type='normal') for ix in weight_importance: weight_importance[ix] = weight_importance[ ix] + args.mu * weight_importance_id[ix] elif args.mode == 'gradient': weight_importance = get_importance(importance_type='gradient', t=args.temperature) elif args.mode == 'KL': weight_importance = get_importance(importance_type='KL', t=args.temperature) #get weight hessian if args.type in ['mnist', 'cifar10' ] and args.mode != 'KL' and args.ha > 0: weight_hessian = get_importance(importance_type='hessian', t=args.temperature) weight_hessian_id = get_importance(importance_type='normal') for ix in weight_hessian: weight_hessian[ ix] = weight_hessian[ix] + args.mu * weight_hessian_id[ix] else: #TODO: delete this if hessian for cifar100 and alexnet has been computed weight_hessian = get_importance(importance_type='normal') #prune mask_list, compression_ratio = prune(model_raw, weight_importance, weight_hessian, valid_ind, ratios, is_imagenet) print("Pruning stage {}, compression ratio {:.4f}".format( stage + 1, compression_ratio)) if not is_imagenet: acc1_train, acc5_train, loss_train = eval_and_print( model_raw, train_ds, is_imagenet, is_train=True, prefix_str="Prune stage {}".format(stage + 1)) acc1_val, acc5_val, loss_val = eval_and_print( model_raw, val_ds, is_imagenet, is_train=False, prefix_str="Prune stage {}".format(stage + 1)) #and finetune if args.prune_finetune: retrain(model_raw, train_ds, val_ds, valid_ind, mask_list, is_imagenet, is_retrain=False) if not is_imagenet: acc1_train, acc5_train, loss_train = eval_and_print( model_raw, train_ds, is_imagenet, is_train=True, prefix_str="Prune stage {}".format(stage + 1)) acc1_val, acc5_val, loss_val = eval_and_print( model_raw, val_ds, is_imagenet, is_train=False, prefix_str="Prune stage {}".format(stage + 1))
# for epoch in range(1, num_epochs + 1): # train(model, device, train_loader, optimizer, epoch) # test(model, device, test_loader) # # save_checkpoint({'epoch': num_epochs, # 'state_dict': model.state_dict(), # 'optim_dict': model.state_dict()}, is_best=False, checkpoint='./') # load_model('./', 'last') # since shuffle=True, this is a random sample of test data # batch = next(iter(test_loader)) # images, labels = batch path = os.getcwd() print(path) model, ds_fetcher, is_imagenet = selector.select('svhn', cuda=False, model_root=path) ds_val = ds_fetcher(batch_size=10, train=False, val=True, data_root=path) for idx, (images, labels) in enumerate(ds_val): images = Variable(torch.FloatTensor(images)) # data = Variable(torch.FloatTensor(data)).cuda() output = model(images) print(idx) # load_model('./', 'last') size_of_batch = images.shape[0] bg_len = round(0.9 * size_of_batch) test_len = min(round(0.1 * size_of_batch), 10) test_idx = [] # test_idx = [[] for _ in range(test_len)]
count = 0 try: with open('result.pkl', 'rb') as f: result = pickle.load(f) except (EOFError, FileNotFoundError): result = pd.DataFrame() for typ in types: # If model_root is specified, assume user wants to use self-trained model if args.model_root: # Load custom keyward arguments for the model mk = model_kwargs[typ] if typ in model_kwargs else {} # Load model and dataset fetcher model_raw, ds_fetcher, is_imagenet = selector.select( typ, model_root=args.model_root, **mk) # Check number of elements in each settings are correct bits_len_match(model_raw, model_bits[typ]['param_bits'], 'param') bits_len_match(model_raw, model_bits[typ]['batch_norm_bits'], 'batch_norm') bits_len_match(model_raw, model_bits[typ]['layer_output_bits'], 'layer_output') # Load dataset val_ds = ds_fetcher(args.batch_size, data_root=args.data_root, train=False, input_size=args.input_size) result_filename = f'result'
def main(): # load model and dataset fetcher if args.type=='synthetic': model_raw, ds_fetcher, is_imagenet = selector.select(args.type, model_root=args.model_root, input_dims=args.input_dims, n_hidden=eval(args.n_hidden), output_dims=args.output_dims, dropout=args.dropout_rate) else: model_raw, ds_fetcher, is_imagenet = selector.select(args.type, model_root=args.model_root) args.ngpu = args.ngpu if is_imagenet else 1 training_size = 60000 if args.type=='mnist' else 50000 # get valid layers valid_ind = [] layer_type_list = [] for i, layer in enumerate(model_raw.modules()): if type(layer) in valid_layer_types: valid_ind.append(i) layer_type_list.append(type(layer)) metrics = np.zeros((6, args.number_of_models)) for i in range(args.number_of_models): #get training dataset and validation dataset if args.type=='synthetic': train_ds = ds_fetcher(args.batch_size, renew=True, save=True, name='train_'+str(i+args.starting_index), model=model_raw, size=args.training_size, input_dims=args.input_dims, n_hidden=args.n_hidden, output_dims=args.output_dims, input_std=args.input_std, noise_std=args.noise_std) val_ds = ds_fetcher(args.batch_size, renew=True, save=True, name='val_'+str(i+args.starting_index), model=model_raw, size=args.val_size, input_dims=args.input_dims, n_hidden=args.n_hidden, output_dims=args.output_dims, input_std=args.input_std, noise_std=args.noise_std) else: if args.subsample_rate < 1.0: indices = np.random.choice(training_size, int(args.subsample_rate*training_size/args.batch_size)*args.batch_size, replace=True) train_ds = ds_fetcher(args.batch_size, data_root=args.data_root, val=False, subsample=True, indices=indices, input_size=args.input_size) else: train_ds = ds_fetcher(args.batch_size, data_root=args.data_root, val=False, input_size=args.input_size) indices = np.arange(training_size) val_ds = ds_fetcher(args.batch_size, data_root=args.data_root, train=False, input_size=args.input_size) # eval raw model if args.type=='synthetic': loss_train = eval_and_print_regression(model_raw, train_ds, is_train=True, prefix_str="Raw") loss_val = eval_and_print_regression(model_raw, val_ds, is_train=False, prefix_str="Raw") else: acc1_train, acc5_train, loss_train = eval_and_print(model_raw, train_ds, is_imagenet, is_train=True, prefix_str="Raw") acc1_val, acc5_val, loss_val = eval_and_print(model_raw, val_ds, is_imagenet, is_train=False, prefix_str="Raw") # retrain model retrain(model_raw, train_ds, val_ds, valid_ind, is_imagenet) if args.type=='synthetic': metrics[2,i] = eval_and_print_regression(model_raw, train_ds, is_train=True, prefix_str="Retrained {}".format(i+args.starting_index)) metrics[5,i] = eval_and_print_regression(model_raw, val_ds, is_train=False, prefix_str="Retrained {}".format(i+args.starting_index)) else: metrics[0,i], metrics[1,i], metrics[2,i] = eval_and_print(model_raw, train_ds, is_imagenet, is_train=True, prefix_str="Retrained {}".format(i+args.starting_index)) metrics[3,i], metrics[4,i], metrics[5,i] = eval_and_print(model_raw, val_ds, is_imagenet, is_train=False, prefix_str="Retrained {}".format(i+args.starting_index)) #save retrained model filename = args.type+"_model_"+str(i+args.starting_index)+".pth.tar" pathname = args.save_root+args.type if args.subsample_rate < 1.0: pathname += "/ssr="+str(int(args.subsample_rate*1000)) if args.type == 'synthetic': pathname += "_"+str(args.input_dims) for dims in eval(args.n_hidden): pathname += "_"+str(dims) pathname += "_"+str(args.output_dims) pathname_model = pathname+"/model" if not os.path.exists(pathname_model): os.makedirs(pathname_model) filepath = os.path.join(pathname_model, filename) with open(filepath, "wb") as f: if args.type == 'synthetic': torch.save({ 'number': i, 'model_state_dict': model_raw.state_dict(), }, f) else: torch.save({ 'number': i, 'subsample_rate': args.subsample_rate, 'ds_indices': indices, 'model_state_dict': model_raw.state_dict(), }, f) #compute importance and write to file weight_importance = get_all_one_importance(model_raw, valid_ind, is_imagenet) filename = args.type+"_normal_"+str(i+args.starting_index) if args.temperature > 1.0: filename += "_t="+str(int(args.temperature)) filename += ".pth" pathname_importances = pathname+"/importances" if not os.path.exists(pathname_importances): os.makedirs(pathname_importances) filepath = os.path.join(pathname_importances, filename) with open(filepath, "wb") as f: torch.save(weight_importance, f) if args.compute_gradient: #compute importance and write to file if args.type == 'synthetic': ds_for_importance = ds_fetcher(args.gbs, renew=False, name='train_'+str(i+args.starting_index), input_dims=args.input_dims, n_hidden=args.n_hidden, output_dims=args.output_dims) else: ds_for_importance = ds_fetcher(args.gbs, data_root=args.data_root, val=False, subsample=True, indices=indices, input_size=args.input_size) weight_importance = get_gradient_importance(model_raw, ds_for_importance, valid_ind, is_imagenet) filename = args.type+"_gradient_"+str(i+args.starting_index) if args.temperature > 1.0: filename += "_t="+str(int(args.temperature)) filename += ".pth" filepath = os.path.join(pathname_importances, filename) with open(filepath, "wb") as f: torch.save(weight_importance, f) if args.compute_hessian: #compute importance and write to file if args.type == 'synthetic': ds_for_hessian = ds_fetcher(args.hbs, renew=False, name='train_'+str(i+args.starting_index), input_dims=args.input_dims, n_hidden=args.n_hidden, output_dims=args.output_dims) else: ds_for_hessian = ds_fetcher(args.hbs, data_root=args.data_root, val=False, subsample=True, indices=indices, input_size=args.input_size) weight_importance = get_hessian_importance(model_raw, ds_for_hessian, valid_ind, is_imagenet) filename = args.type+"_hessian_"+str(i+args.starting_index) if args.temperature > 1.0: filename += "_t="+str(int(args.temperature)) filename += ".pth" filepath = os.path.join(pathname_importances, filename) with open(filepath, "wb") as f: torch.save(weight_importance, f) perf_inf = "" if args.type == 'synthetic': perf_inf += "After retraining, type={}, training loss={:.6f}+-{:.6f} \n".format(args.type, np.mean(metrics[2]), np.std(metrics[2])) perf_inf += "After retraining, type={}, validation loss={:.6f}+-{:.6f} \n".format(args.type, np.mean(metrics[5]), np.std(metrics[5])) else: perf_inf += "After retraining, type={}, training acc1={:.4f}+-{:.4f}, acc5={:.4f}+-{:.4f}, loss={:.6f}+-{:.6f}\n ".format(args.type, np.mean(metrics[0]), np.std(metrics[0]), np.mean(metrics[1]), np.std(metrics[1]), np.mean(metrics[2]), np.std(metrics[2])) perf_inf += "After retraining, type={}, validation acc1={:.4f}+-{:.4f}, acc5={:.4f}+-{:.4f}, loss={:.6f}+-{:.6f}\n".format(args.type, np.mean(metrics[3]), np.std(metrics[3]), np.mean(metrics[4]), np.std(metrics[4]), np.mean(metrics[5]), np.std(metrics[5])) print(perf_inf)
def main(): # load model and dataset fetcher model_raw, ds_fetcher, is_imagenet = selector.select( args.type, model_root=args.model_root) args.ngpu = args.ngpu if is_imagenet else 1 # get valid layers valid_ind = [] layer_type_list = [] for i, layer in enumerate(model_raw.modules()): if type(layer) in valid_layer_types: valid_ind.append(i) layer_type_list.append(type(layer)) #get training dataset and validation dataset and dataset for computing importance train_ds = ds_fetcher(args.batch_size, data_root=args.data_root, val=False, input_size=args.input_size) val_ds = ds_fetcher(args.batch_size, data_root=args.data_root, train=False, input_size=args.input_size) # eval raw model if not is_imagenet: acc1_train, acc5_train, loss_train = eval_and_print(model_raw, train_ds, is_imagenet, is_train=True, prefix_str="Raw") acc1_val, acc5_val, loss_val = eval_and_print(model_raw, val_ds, is_imagenet, is_train=False, prefix_str="Raw") # get quantize ratios if args.bits is not None: clusters = [int(math.pow(2, r)) for r in eval(args.bits)] # the actual ratio else: if args.fix_bit is not None: clusters = [int(math.pow(2, args.fix_bit))] * len(valid_ind) else: raise NotImplementedError #get weight importance if args.mode == 'normal': weight_importance = get_importance(importance_type='normal') elif args.mode == 'hessian': weight_importance = get_importance(importance_type='hessian', t=args.temperature) weight_importance_id = get_importance(importance_type='normal') for ix in weight_importance: weight_importance[ix] = weight_importance[ ix] + args.mu * weight_importance_id[ix] elif args.mode == 'gradient': weight_importance = get_importance(importance_type='gradient', t=args.temperature) elif args.mode == 'KL': weight_importance = get_importance(importance_type='KL', t=args.temperature) if args.type in ['mnist', 'cifar10' ] and args.mode != 'KL' and args.ha > 0.0: #get weight hessian weight_hessian = get_importance(importance_type='hessian', t=args.temperature) weight_hessian_id = get_importance(importance_type='normal') for ix in weight_hessian: weight_hessian[ ix] = weight_hessian[ix] + args.mu * weight_hessian_id[ix] else: #TODO: delete this after hessian of cifar100 and alexnet were implemented weight_hessian = get_importance(importance_type='normal') #quantize compress_ratio, cl_list = quantize(model_raw, weight_importance, weight_hessian, valid_ind, clusters, is_imagenet) print("Quantization, ratio={:.4f}".format(compress_ratio)) if not is_imagenet: acc1_train, acc5_train, loss_train = eval_and_print( model_raw, train_ds, is_imagenet, is_train=True, prefix_str="Quantization") acc1_val, acc5_val, loss_val = eval_and_print(model_raw, val_ds, is_imagenet, is_train=False, prefix_str="Quantization") if args.quant_finetune: finetune(model_raw, cl_list, train_ds, val_ds, valid_ind, is_imagenet) print("Quantization and finetune, ratio={:.4f}".format(compress_ratio)) if not is_imagenet: acc1_train, acc5_train, loss_train = eval_and_print( model_raw, train_ds, is_imagenet, is_train=True, prefix_str="Quantization and finetune") acc1_val, acc5_val, loss_val = eval_and_print( model_raw, val_ds, is_imagenet, is_train=False, prefix_str="Quantization and finetune")
import numpy as np # from pytorch_hessian_test import hessian_diagonal, hessian, total_derivative # from tensordot import tensordot_pytorch, contraction_pytorch from utils.hessian_utils import * from itertools import product import datetime # NOTE: Even with cuda=False the selector still tried to map the variables to GPUs. I had to change code in mnist/model.py to force mapping to CPU. cuda = torch.device('cuda') model_raw, ds_fetcher, is_imagenet = selector.select('cifar10', cuda=True) # ps = list(model_raw.parameters()) # batch_size = 13 caused CUDA OOM on a K80 batch_size = 15 ds_val = ds_fetcher(batch_size=batch_size, train=False, val=True) for idx, (data, target) in enumerate(ds_val): print(idx) z = data.to(device=cuda) target = target.to(device=cuda)
def main(): parser = argparse.ArgumentParser(description='PyTorch SVHN Example') parser.add_argument('--type', default='cifar10', help='|'.join(selector.known_models)) parser.add_argument('--quant_method', default='linear', help='linear|minmax|log|tanh') parser.add_argument('--batch_size', type=int, default=100, help='input batch size for training (default: 64)') parser.add_argument('--gpu', default=None, help='index of gpus to use') parser.add_argument('--ngpu', type=int, default=8, help='number of gpus to use') parser.add_argument('--seed', type=int, default=117, help='random seed (default: 1)') parser.add_argument('--model_root', default='~/.torch/models/', help='folder to save the model') parser.add_argument('--data_root', default='/data/public_dataset/pytorch/', help='folder to save the model') parser.add_argument('--logdir', default='log/default', help='folder to save to the log') parser.add_argument('--input_size', type=int, default=224, help='input size of image') parser.add_argument('--n_sample', type=int, default=20, help='number of samples to infer the scaling factor') parser.add_argument('--param_bits', type=int, default=8, help='bit-width for parameters') parser.add_argument('--bn_bits', type=int, default=32, help='bit-width for running mean and std') parser.add_argument('--fwd_bits', type=int, default=8, help='bit-width for layer output') parser.add_argument('--overflow_rate', type=float, default=0.0, help='overflow rate') args = parser.parse_args() args.gpu = misc.auto_select_gpu(utility_bound=0, num_gpu=args.ngpu, selected_gpus=args.gpu) args.ngpu = len(args.gpu) misc.ensure_dir(args.logdir) args.model_root = misc.expand_user(args.model_root) args.data_root = misc.expand_user(args.data_root) args.input_size = 299 if 'inception' in args.type else args.input_size assert args.quant_method in ['linear', 'minmax', 'log', 'tanh'] print("=================FLAGS==================") for k, v in args.__dict__.items(): print('{}: {}'.format(k, v)) print("========================================") assert torch.cuda.is_available(), 'no cuda' torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) # load model and dataset fetcher model_raw, ds_fetcher, is_imagenet = selector.select(args.type, model_root=args.model_root) args.ngpu = args.ngpu if is_imagenet else 1 # quantize parameters if args.param_bits < 32: state_dict = model_raw.state_dict() state_dict_quant = OrderedDict() sf_dict = OrderedDict() for k, v in state_dict.items(): if 'running' in k: if args.bn_bits >=32: print("Ignoring {}".format(k)) state_dict_quant[k] = v continue else: bits = args.bn_bits else: bits = args.param_bits if args.quant_method == 'linear': sf = bits - 1. - quant.compute_integral_part(v, overflow_rate=args.overflow_rate) v_quant = quant.linear_quantize(v, sf, bits=bits) elif args.quant_method == 'log': v_quant = quant.log_minmax_quantize(v, bits=bits) elif args.quant_method == 'minmax': v_quant = quant.min_max_quantize(v, bits=bits) else: v_quant = quant.tanh_quantize(v, bits=bits) state_dict_quant[k] = v_quant print(k, bits) model_raw.load_state_dict(state_dict_quant) # quantize forward activation if args.fwd_bits < 32: model_raw = quant.duplicate_model_with_quant(model_raw, bits=args.fwd_bits, overflow_rate=args.overflow_rate, counter=args.n_sample, type=args.quant_method) print(model_raw) val_ds_tmp = ds_fetcher(10, data_root=args.data_root, train=False, input_size=args.input_size) misc.eval_model(model_raw, val_ds_tmp, ngpu=1, n_sample=args.n_sample, is_imagenet=is_imagenet) # eval model val_ds = ds_fetcher(args.batch_size, data_root=args.data_root, train=False, input_size=args.input_size) acc1, acc5 = misc.eval_model(model_raw, val_ds, ngpu=args.ngpu, is_imagenet=is_imagenet) # print sf print(model_raw) res_str = "type={}, quant_method={}, param_bits={}, bn_bits={}, fwd_bits={}, overflow_rate={}, acc1={:.4f}, acc5={:.4f}".format( args.type, args.quant_method, args.param_bits, args.bn_bits, args.fwd_bits, args.overflow_rate, acc1, acc5) print(res_str) with open('acc1_acc5.txt', 'a') as f: f.write(res_str + '\n')
def main(): # load model and dataset fetcher model_raw, ds_fetcher, is_imagenet = selector.select( args.type, model_root=args.model_root) args.ngpu = args.ngpu if is_imagenet else 1 training_size = 60000 if args.type == 'mnist' else 50000 # get valid layers valid_ind = [] layer_type_list = [] for i, layer in enumerate(model_raw.modules()): if type(layer) in valid_layer_types: valid_ind.append(i) layer_type_list.append(type(layer)) #get training dataset and validation dataset and dataset for computing importance if args.mode == 'hessian' and args.hessian_ssr < 1.0: indices = np.random.choice( training_size, int(args.hessian_ssr * training_size / args.batch_size) * args.batch_size, replace=True) ds_for_importance = ds_fetcher(args.batch_size, data_root=args.data_root, val=False, subsample=True, indices=indices, input_size=args.input_size) elif args.type not in ['mnist', 'cifar10', 'cifar100']: ds_for_importance = ds_fetcher(args.batch_size, data_root=args.data_root, val=True, input_size=args.input_size) elif args.mode == 'KL': ds_for_importance = ds_fetcher(args.batch_size, data_root=args.data_root, train=False, input_size=args.input_size) else: ds_for_importance = ds_fetcher(args.batch_size, data_root=args.data_root, val=False, input_size=args.input_size) #get weight importance if args.mode == 'normal': weight_importance = get_all_one_importance(model_raw, valid_ind, is_imagenet) elif args.mode == 'hessian': weight_importance = get_hessian_importance(model_raw, ds_for_importance, valid_ind, is_imagenet) elif args.mode == 'gradient': weight_importance = get_gradient_importance(model_raw, ds_for_importance, valid_ind, is_imagenet) elif args.mode == 'KL': weight_importance = get_KL_importance(model_raw, ds_for_importance, valid_ind, is_imagenet) else: raise NotImplementedError #write to file filename = args.type + "_" + args.mode #if args.loss != 'cross_entropy': # filename += "_"+args.loss if args.temperature > 1.0: filename += "_t=" + str(int(args.temperature)) filename += ".pth" pathname = args.save_root + args.type if not os.path.exists(pathname): os.mkdir(pathname) filepath = os.path.join(pathname, filename) with open(filepath, "wb") as f: torch.save(weight_importance, f)
def main(): # load model and dataset fetcher if args.type == 'synthetic': model_raw, ds_fetcher, is_imagenet = selector.select( args.type, model_root=args.model_root, input_dims=args.input_dims, n_hidden=eval(args.n_hidden), output_dims=args.output_dims, dropout=args.dropout_rate) else: model_raw, ds_fetcher, is_imagenet = selector.select( args.type, model_root=args.model_root) args.ngpu = args.ngpu if is_imagenet else 1 # get valid layers valid_ind = [] layer_type_list = [] for i, layer in enumerate(model_raw.modules()): if type(layer) in valid_layer_types: valid_ind.append(i) layer_type_list.append(type(layer)) # get quantize ratios if args.bits is not None: clusters = [int(math.pow(2, r)) for r in eval(args.bits)] # the actual ratio else: if args.fix_bit is not None: clusters = [int(math.pow(2, args.fix_bit))] * len(valid_ind) else: raise NotImplementedError metrics = np.zeros((19, args.number_of_models)) #iterate over all retrained models for i in range(args.number_of_models): #load retrained model filename = args.type + "_model_" + str(i) + ".pth.tar" pathname = args.save_root + args.type if args.subsample_rate < 1.0: pathname += "/ssr=" + str(int(args.subsample_rate * 1000)) if args.type == 'synthetic': pathname += "_" + str(args.input_dims) for dims in eval(args.n_hidden): pathname += "_" + str(dims) pathname += "_" + str(args.output_dims) pathname += "/model" filepath = os.path.join(pathname, filename) assert os.path.isfile(filepath), "Can not find model at " + filepath with open(filepath, "rb") as f: print("Loading model parameters from" + filepath) checkpoint = torch.load(f) model_raw.load_state_dict(checkpoint['model_state_dict']) if args.type != 'synthetic': ds_indices = checkpoint['ds_indices'] #get training dataset and validation dataset if args.type == 'synthetic': train_ds = ds_fetcher(args.batch_size, renew=False, name='train_' + str(i), input_dims=args.input_dims, n_hidden=args.n_hidden, output_dims=args.output_dims) val_ds = ds_fetcher(args.batch_size, renew=False, name='val_' + str(i), input_dims=args.input_dims, n_hidden=args.n_hidden, output_dims=args.output_dims) else: train_ds = ds_fetcher(args.batch_size, data_root=args.data_root, val=False, subsample=True, indices=ds_indices, input_size=args.input_size) val_ds = ds_fetcher(args.batch_size, data_root=args.data_root, train=False, input_size=args.input_size) #get weight importance if args.mode == 'normal': weight_importance = get_importance('normal', i) elif args.mode == 'hessian': weight_importance = get_importance('hessian', i, t=args.temperature) weight_importance_id = get_importance('normal', i) for ix in weight_importance: weight_importance[ix] = weight_importance[ ix] + args.mu * weight_importance_id[ix] elif args.mode == 'gradient': weight_importance = get_importance('gradient', i, t=args.temperature) weight_importance_id = get_importance('normal', i) for ix in weight_importance: weight_importance[ix] = weight_importance[ ix] + args.mu * weight_importance_id[ix] # eval raw model if args.type == 'synthetic': metrics[3, i] = eval_and_print_regression( model_raw, train_ds, is_train=True, prefix_str="Retrained model number %d" % i) metrics[6, i] = eval_and_print_regression( model_raw, val_ds, is_train=False, prefix_str="Retrained model number %d" % i) else: metrics[1, i], metrics[2, i], metrics[3, i] = eval_and_print( model_raw, train_ds, is_imagenet, is_train=True, prefix_str="Retrained model number %d" % i) metrics[4, i], metrics[5, i], metrics[6, i] = eval_and_print( model_raw, val_ds, is_imagenet, is_train=False, prefix_str="Retrained model number %d" % i) #quantize metrics[0, i], cl_list = quantize(model_raw, weight_importance, valid_ind, clusters, is_imagenet) #print("Quantization, ratio={:.4f}".format(metrics[0,i])) if args.type == 'synthetic': metrics[9, i] = eval_and_print_regression( model_raw, train_ds, is_train=True, prefix_str="After quantization number %d" % i) metrics[12, i] = eval_and_print_regression( model_raw, val_ds, is_train=False, prefix_str="After quantization number %d" % i) else: metrics[7, i], metrics[8, i], metrics[9, i] = eval_and_print( model_raw, train_ds, is_imagenet, is_train=True, prefix_str="After quantization number %d" % i) metrics[10, i], metrics[11, i], metrics[12, i] = eval_and_print( model_raw, val_ds, is_imagenet, is_train=False, prefix_str="After quantization number %d" % i) if args.quant_finetune: finetune(model_raw, cl_list, train_ds, val_ds, valid_ind, is_imagenet) #print("Quantization and finetune, ratio={:.4f}".format(metrics[0,i])) if args.type == 'synthetic': metrics[15, i] = eval_and_print_regression( model_raw, train_ds, is_train=True, prefix_str="After finetune number %d" % i) metrics[18, i] = eval_and_print_regression( model_raw, val_ds, is_train=False, prefix_str="After finetune number %d" % i) else: metrics[13, i], metrics[14, i], metrics[15, i] = eval_and_print( model_raw, train_ds, is_imagenet, is_train=True, prefix_str="After finetune number %d" % i) metrics[16, i], metrics[17, i], metrics[18, i] = eval_and_print( model_raw, val_ds, is_imagenet, is_train=False, prefix_str="After finetune number %d" % i) #print average performance information perf_inf = "\n" for arg in sys.argv: perf_inf += arg + " " perf_inf += "\n \n" if args.type == 'synthetic': perf_inf += "Before quantization, type={}, training loss={:.6f}+-{:.6f} \n".format( args.type, np.mean(metrics[3]), np.std(metrics[3])) perf_inf += "Before quantization, type={}, validation loss={:.6f}+-{:.6f} \n".format( args.type, np.mean(metrics[6]), np.std(metrics[6])) perf_inf += "Compression ratio = {:.4f}+-{:.4f} \n".format( np.mean(metrics[0]), np.std(metrics[0])) perf_inf += "After quantization, type={}, training loss={:.6f}+-{:.6f} \n".format( args.type, np.mean(metrics[9]), np.std(metrics[9])) perf_inf += "After quantization, type={}, validation loss={:.6f}+-{:.6f} \n".format( args.type, np.mean(metrics[12]), np.std(metrics[12])) if args.quant_finetune: perf_inf += "After finetune, type={}, training loss={:.6f}+-{:.6f} \n".format( args.type, np.mean(metrics[15]), np.std(metrics[15])) perf_inf += "After finetune, type={}, loss={:.6f}+-{:.6f} \n".format( args.type, np.mean(metrics[18]), np.std(metrics[18])) else: perf_inf += "Before quantization, type={}, training acc1={:.4f}+-{:.4f}, acc5={:.4f}+-{:.4f}, loss={:.6f}+-{:.6f}\n".format( args.type, np.mean(metrics[1]), np.std(metrics[1]), np.mean(metrics[2]), np.std(metrics[2]), np.mean(metrics[3]), np.std(metrics[3])) perf_inf += "Before quantization, type={}, validation acc1={:.4f}+-{:.4f}, acc5={:.4f}+-{:.4f}, loss={:.6f}+-{:.6f}\n".format( args.type, np.mean(metrics[4]), np.std(metrics[4]), np.mean(metrics[5]), np.std(metrics[5]), np.mean(metrics[6]), np.std(metrics[6])) perf_inf += "Compression ratio = {:.4f}+-{:.4f}\n".format( np.mean(metrics[0]), np.std(metrics[0])) perf_inf += "After quantization, type={}, training acc1={:.4f}+-{:.4f}, acc5={:.4f}+-{:.4f}, loss={:.6f}+-{:.6f} \n".format( args.type, np.mean(metrics[7]), np.std(metrics[7]), np.mean(metrics[8]), np.std(metrics[8]), np.mean(metrics[9]), np.std(metrics[9])) perf_inf += "After quantization, type={}, validation acc1={:.4f}+-{:.4f}, acc5={:.4f}+-{:.4f}, loss={:.6f}+-{:.6f}\n".format( args.type, np.mean(metrics[10]), np.std(metrics[10]), np.mean(metrics[11]), np.std(metrics[11]), np.mean(metrics[12]), np.std(metrics[12])) if args.quant_finetune: perf_inf += "After finetune, type={}, training acc1={:.4f}+-{:.4f}, acc5={:.4f}+-{:.4f}, loss={:.6f}+-{:.6f}\n ".format( args.type, np.mean(metrics[13]), np.std(metrics[13]), np.mean(metrics[14]), np.std(metrics[14]), np.mean(metrics[15]), np.std(metrics[15])) perf_inf += "After finetune, type={}, validation acc1={:.4f}+-{:.4f}, acc5={:.4f}+-{:.4f}, loss={:.6f}+-{:.6f}\n".format( args.type, np.mean(metrics[16]), np.std(metrics[16]), np.mean(metrics[17]), np.std(metrics[17]), np.mean(metrics[18]), np.std(metrics[18])) print(perf_inf) if args.result_root != None: filename = args.type + "_" + args.mode + "_" + args.result_name pathname = args.result_root if not os.path.exists(pathname): os.makedirs(pathname) filepath = os.path.join(pathname, filename) with open(filepath, "w") as f: f.write(perf_inf)
impath = os.path.join(path, '{}_graph.png'.format(gtype)) plt.savefig(impath) plt.pause(1) fig1.clf() # plt.clf() plt.close(fig1) return is_cuda = torch.cuda.is_available() path = os.getcwd() print(path) model_raw, ds_fetcher, is_imagenet = selector.select('svhn', cuda=is_cuda, model_root=path) ds_val = ds_fetcher(batch_size=10, train=False, val=True, data_root=path) for idx, (data, target) in enumerate(ds_val): data = Variable(torch.FloatTensor(data)) if is_cuda: data = Variable(torch.FloatTensor(data)).cuda() output = model_raw(data) print(idx) print(output.shape) net_vals = collect_network_statistics(model_raw) # plot_summary_graphs_layers(net_vals, 'D', 'Grads', './') plot_graph(net_vals, None, 'D', './', None)
num_gpu=args.ngpu, selected_gpus=args.gpu) args.ngpu = len(args.gpu) args.model_root = misc.expand_user(args.model_root) args.data_root = misc.expand_user(args.data_root) args.input_size = 299 if 'inception' in args.type else args.input_size print("=================FLAGS==================") for k, v in args.__dict__.items(): print('{}: {}'.format(k, v)) print("========================================") assert torch.cuda.is_available(), 'no cuda' torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) # load model and dataset fetcher model_raw, ds_fetcher = selector.select(args.type, model_root=args.model_root) # eval model val_ds = ds_fetcher(args.batch_size, data_root=args.data_root, train=False, input_size=args.input_size) acc1, acc5 = misc.eval_model(model_raw, val_ds, ngpu=args.ngpu) # print sf print(model_raw) res_str = "type={}, acc1={:.4f}, acc5={:.4f}".format(args.type, acc1, acc5) print(res_str) with open('acc1_acc5.txt', 'a') as f: f.write(res_str + '\n')
import torch from torch.autograd import Variable from utee import selector model_raw, ds_fetcher, is_imagenet = selector.select('mnist') ds_val = ds_fetcher(batch_size=10, train=False, val=True) count = 0 right_num = 0 for idx, (data, target) in enumerate(ds_val): data = Variable(torch.FloatTensor(data)).cuda() output = model_raw(data) index = 0 for i in output: predict = torch.argmax(i) count = count + 1 flag = 1 if int(predict) == target[index] else 0 index = index + 1 right_num = right_num + flag print(f"准确率为{right_num/count}") print("这里是master branch")
misc.ensure_dir(args.logdir) args.model_root = misc.expand_user(args.model_root) args.data_root = misc.expand_user(args.data_root) args.input_size = 299 if 'inception' in args.type else args.input_size assert args.quant_method in ['linear', 'minmax', 'log', 'tanh'] print("=================FLAGS==================") for k, v in args.__dict__.items(): print('{}: {}'.format(k, v)) print("========================================") assert torch.cuda.is_available(), 'no cuda' torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) # load model and dataset fetcher model_raw, ds_fetcher, is_imagenet = selector.select(args.type, model_root=args.model_root) args.ngpu = args.ngpu if is_imagenet else 1 # quantize parameters if args.param_bits < 32: state_dict = model_raw.state_dict() state_dict_quant = OrderedDict() sf_dict = OrderedDict() for k, v in state_dict.items(): if 'running' in k: if args.bn_bits >=32: print("Ignoring {}".format(k)) state_dict_quant[k] = v continue else: bits = args.bn_bits