def main(): print("test") device = af.get_pytorch_device() model, param = arcs.create_resnet_iterative( "networks/", "dense", "0", (True, [0.8, 0.75, 0.66, 0.6], 128), False) dataset = data.CIFAR10() optimizer, scheduler = af.get_full_optimizer( model, (param['learning_rate'], param['weight_decay'], param['momentum'], -1), ([4], [0.1])) train_params = dict( epochs=10, epoch_growth=[2, 4, 6], epoch_prune=[1, 3, 5, 7, 8], prune_batch_size=128, prune_type="0", reinit=False, min_ratio=[0.5, 0.4, 0.3, 0.2], ) params, best_model = model_funcs.iter_training_0(model, dataset, train_params, optimizer, scheduler, device) params['epoch_prune'] = train_params['epoch_prune'] # af.print_sparsity(best_model) print("number of flops: {}".format( af.calculate_flops(best_model, (3, 32, 32)))) for layer in best_model.modules(): if isinstance(layer, (nn.Conv2d, nn.Linear)): layer.weight_mask = nn.Parameter(torch.ones_like( layer.weight_mask)) print("number of flops no pruning: {}".format( af.calculate_flops(best_model, (3, 32, 32))))
def train_model(models_path, cr_params, device, num=0): type, mode, pruning, ics = cr_params model, params = arcs.create_resnet_iterative(models_path, type, mode, pruning, ics, False) dataset = af.get_dataset('cifar10') params['name'] = params['base_model'] + '_{}_{}'.format(type, mode) if model.prune: params['name'] += "_prune_{}".format( [x * 100 for x in model.keep_ratio]) print("prune: {}".format(model.keep_ratio)) if mode == "0": params['epochs'] = 250 params['milestones'] = [120, 160, 180] params['gammas'] = [0.1, 0.01, 0.01] if mode == "1": params['epochs'] = 300 params['milestones'] = [100, 150, 200] params['gammas'] = [0.1, 0.1, 0.1] if "full" in type: params['learning_rate'] = 0.1 print("lr: {}".format(params['learning_rate'])) opti_param = (params['learning_rate'], params['weight_decay'], params['momentum'], -1) lr_schedule_params = (params['milestones'], params['gammas']) model.to(device) train_params = dict( epochs=params['epochs'], epoch_growth=[25, 50, 75], epoch_prune=[10, 35, 60, 85, 110, 135, 160], #[10, 35, 60, 85], prune_batch_size=pruning[2], prune_type='2', # 0 skip layer, 1 normal full, 2 iterative reinit=False, min_ratio=[ 0.3, 0.1, 0.05, 0.05 ] # not needed if skip layers, minimum for the iterative pruning ) params['epoch_growth'] = train_params['epoch_growth'] params['epoch_prune'] = train_params['epoch_prune'] optimizer, scheduler = af.get_full_optimizer(model, opti_param, lr_schedule_params) metrics, best_model = model.train_func(model, dataset, train_params, optimizer, scheduler, device) _link_metrics(params, metrics) af.print_sparsity(best_model) arcs.save_model(best_model, params, models_path, params['name'], epoch=-1) print("test acc: {}, last val: {}".format(params['test_top1_acc'], params['valid_top1_acc'][-1])) return best_model, params
def train_model(models_path, device): _, sdn = arcs.create_resnet56(models_path, 'cifar10', save_type='d') print('snd name: {}'.format(sdn)) # train_sdn(models_path, sdn, device) print("Training model...") trained_model, model_params = arcs.load_model(models_path, sdn, 0) dataset = af.get_dataset(model_params['task']) lr = model_params['learning_rate'] momentum = model_params['momentum'] weight_decay = model_params['weight_decay'] milestones = model_params['milestones'] gammas = model_params['gammas'] num_epochs = model_params['epochs'] model_params['optimizer'] = 'SGD' opti_param = (lr, weight_decay, momentum, -1) lr_schedule_params = (milestones, gammas) optimizer, scheduler = af.get_full_optimizer(trained_model, opti_param, lr_schedule_params) trained_model_name = sdn + '_training' print('Training: {}...'.format(trained_model_name)) trained_model.to(device) metrics = trained_model.train_func(trained_model, dataset, num_epochs, optimizer, scheduler, device=device) model_params['train_top1_acc'] = metrics['train_top1_acc'] model_params['test_top1_acc'] = metrics['test_top1_acc'] model_params['train_top3_acc'] = metrics['train_top3_acc'] model_params['test_top3_acc'] = metrics['test_top3_acc'] model_params['epoch_times'] = metrics['epoch_times'] model_params['lrs'] = metrics['lrs'] total_training_time = sum(model_params['epoch_times']) model_params['total_time'] = total_training_time print('Training took {} seconds...'.format(total_training_time)) arcs.save_model(trained_model, model_params, models_path, trained_model_name, epoch=-1) return trained_model, dataset
def train(models_path, untrained_models, sdn=False, ic_only_sdn=False, device='cpu', ds=False): print('Training models...') for base_model in untrained_models: trained_model, model_params = arcs.load_model(models_path, base_model, 0) dataset = af.get_dataset(model_params['task']) learning_rate = model_params['learning_rate'] momentum = model_params['momentum'] weight_decay = model_params['weight_decay'] milestones = model_params['milestones'] gammas = model_params['gammas'] num_epochs = model_params['epochs'] model_params['optimizer'] = 'SGD' if ic_only_sdn: # IC-only training, freeze the original weights learning_rate = model_params['ic_only']['learning_rate'] num_epochs = model_params['ic_only']['epochs'] milestones = model_params['ic_only']['milestones'] gammas = model_params['ic_only']['gammas'] model_params['optimizer'] = 'Adam' trained_model.ic_only = True else: trained_model.ic_only = False if ds: trained_model.ds = True else: trained_model.ds = False optimization_params = (learning_rate, weight_decay, momentum) lr_schedule_params = (milestones, gammas) # pdb.set_trace() if sdn: if ic_only_sdn: optimizer, scheduler = af.get_sdn_ic_only_optimizer( trained_model, optimization_params, lr_schedule_params) trained_model_name = base_model + '_ic_only_ic{}'.format( np.sum(model_params['add_ic'])) else: optimizer, scheduler = af.get_full_optimizer( trained_model, optimization_params, lr_schedule_params) trained_model_name = base_model + '_sdn_training_ic{}'.format( np.sum(model_params['add_ic'])) else: optimizer, scheduler = af.get_full_optimizer( trained_model, optimization_params, lr_schedule_params) trained_model_name = base_model if ds: trained_model_name = trained_model_name + '_ds' # pdb.set_trace() print('Training: {}...'.format(trained_model_name)) # trained_model = nn.DataParallel(trained_model) trained_model.to(device) metrics = trained_model.train_func(trained_model, dataset, num_epochs, optimizer, scheduler, device=device) model_params['train_top1_acc'] = metrics['train_top1_acc'] model_params['test_top1_acc'] = metrics['test_top1_acc'] model_params['train_top5_acc'] = metrics['train_top5_acc'] model_params['test_top5_acc'] = metrics['test_top5_acc'] model_params['epoch_times'] = metrics['epoch_times'] model_params['lrs'] = metrics['lrs'] total_training_time = sum(model_params['epoch_times']) model_params['total_time'] = total_training_time print('Training took {} seconds...'.format(total_training_time)) arcs.save_model(trained_model, model_params, models_path, trained_model_name, epoch=-1)