torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) if args.sp: from sp_mbnet import sp_mbnet as mbnet else: from mobilenetv1 import MobileNetV1 as mbnet assert args.load assert os.path.isfile(args.load) # sys.exit(0) checkpoint = torch.load(args.load) model = mbnet(dataset=args.dataset, cfg=checkpoint['cfg']) # load weights, otherwise, only the arch is used model.load_state_dict(checkpoint['state_dict']) model.to(device) train_loader, test_loader = \ get_data_loader(dataset = args.dataset, train_batch_size = args.batch_size, test_batch_size = args.test_batch_size, use_cuda=args.cuda) def test(model): model.eval() test_loss = 0 correct = 0 for data, target in test_loader: if args.cuda:
ch = logging.StreamHandler() fh = logging.FileHandler(os.path.join(args.save, logging_file_path)) formatter = logging.Formatter('%(asctime)s - %(message)s') ch.setFormatter(formatter) fh.setFormatter(formatter) log.addHandler(fh) log.addHandler(ch) ######################################################### assert args.load assert os.path.isfile(args.load) log.info("=> loading checkpoint '{}'".format(args.load)) checkpoint = torch.load(args.load) model = mbnet(dataset=args.dataset, cfg=checkpoint['cfg']).to(device) from collections import OrderedDict new_state_dict = OrderedDict() model.load_state_dict(checkpoint['state_dict']) log.info("=> loaded checkpoint '{}' ".format(args.load)) del checkpoint def test(model): model.eval() test_loss = 0 correct = 0 for data, target in test_loader:
# define loss function (criterion) and optimizer num_classes = 1000 # Data loading code train_loader, val_loader = \ get_data_loader(args.data, train_batch_size=args.batch_size, test_batch_size=args.test_batch_size, workers=args.workers) ## loading pretrained model ## assert args.load assert os.path.isfile(args.load) print("=> loading checkpoint '{}'".format(args.load)) checkpoint = torch.load(args.load) model = mbnet(cfg=checkpoint['cfg']) total_flops = print_model_param_flops(model, 224, multiply_adds=False) print(total_flops) if args.use_cuda: model.cuda() selected_model_keys = [k for k in model.state_dict().keys() if not (k.endswith('.y') or k.endswith('.v') or k.startswith('net_params') or k.startswith('y_params') or k.startswith('v_params'))] saved_model_keys = checkpoint['state_dict'] from collections import OrderedDict new_state_dict = OrderedDict() if len(selected_model_keys) == len(saved_model_keys): for k0, k1 in zip(selected_model_keys, saved_model_keys): new_state_dict[k0] = checkpoint['state_dict'][k1]
def main(): global best_prec1, log batch_size = args.batch_size * max(1, args.num_gpus) args.lr = args.lr * (batch_size // 256) print(batch_size, args.lr, args.num_gpus) num_classes = 1000 num_training_samples = 1281167 args.num_batches_per_epoch = num_training_samples // batch_size assert args.exp_name args.save = os.path.join(args.save, args.exp_name) if not os.path.exists(args.save): os.makedirs(args.save) hyper_str = "run_{}_lr_{}_decay_{}_b_{}_gpu_{}".format(args.epochs, args.lr, \ args.lr_mode, batch_size, args.num_gpus) ## bn-based pruning base model ## if args.sr: hyper_str = "{}_sr_grow_{}_s_{}".format(hyper_str, args.m, args.s) ## using amc configuration ## elif args.amc: hyper_str = "{}_amc".format(hyper_str) elif args.sp: hyper_str = "{}_sp_base_{}".format(hyper_str, args.sp_cfg) else: hyper_str = "{}_grow_{}".format(hyper_str, args.m) args.model_save_path = \ os.path.join(args.save, 'mbv1_{}.pth.tar'.format(hyper_str)) #args.logging_file_path = \ # os.path.join(args.save, 'mbv1_{}.log'.format(hyper_str)) #print(args.model_save_path, args.logging_file_path) ########################################################## ## create file handler which logs even debug messages #import logging #log = logging.getLogger() #log.setLevel(logging.INFO) #ch = logging.StreamHandler() #fh = logging.FileHandler(args.logging_file_path) #formatter = logging.Formatter('%(asctime)s - %(message)s') #ch.setFormatter(formatter) #fh.setFormatter(formatter) #log.addHandler(fh) #log.addHandler(ch) ######################################################### args.distributed = args.world_size > 1 if args.distributed: dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) # Use CUDA use_cuda = torch.cuda.is_available() args.use_cuda = use_cuda # Random seed random.seed(0) torch.manual_seed(0) if use_cuda: torch.cuda.manual_seed_all(0) device = 'cuda' cudnn.benchmark = True else: device = 'cpu' if args.evaluate == 1: device = 'cuda:0' if args.sp: model = mbnet(default=args.sp_cfg) else: #model = mobilenetv1(amc=args.amc, m=args.m) model = mbnet(amc=args.amc, m=args.m) print(model.cfg) cfg = model.cfg total_params = print_model_param_nums(model.cpu()) total_flops = print_model_param_flops(model.cpu(), 224, multiply_adds=False) print(total_params, total_flops) if not args.distributed: model = torch.nn.DataParallel(model).cuda() else: model.cuda() model = torch.nn.parallel.DistributedDataParallel(model) # define loss function (criterion) and optimizer if args.label_smoothing: criterion = CrossEntropyLabelSmooth(num_classes).cuda() else: criterion = nn.CrossEntropyLoss().cuda() ### all parameter #### no_wd_params, wd_params = [], [] for name, param in model.named_parameters(): if param.requires_grad: if ".bn" in name or '.bias' in name: no_wd_params.append(param) else: wd_params.append(param) no_wd_params = nn.ParameterList(no_wd_params) wd_params = nn.ParameterList(wd_params) optimizer = torch.optim.SGD([ { 'params': no_wd_params, 'weight_decay': 0. }, { 'params': wd_params, 'weight_decay': args.weight_decay }, ], args.lr, momentum=args.momentum, nesterov=True) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.model_save_path): print("=> loading checkpoint '{}'".format(args.model_save_path)) checkpoint = torch.load(args.model_save_path) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.model_save_path, checkpoint['epoch'])) else: pass #print("=> no checkpoint found at '{}'".format(args.model_save_path)) # Data loading code train_loader, val_loader = \ get_data_loader(args.data, train_batch_size=batch_size, test_batch_size=32, workers=args.workers) if args.evaluate: validate(val_loader, model, criterion) return for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) #adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'cfg': cfg, 'sr': args.sr, 'amc': args.amc, 's': args.s, 'args': args, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, args.model_save_path)
fh = logging.FileHandler(logging_file_path) formatter = logging.Formatter('%(asctime)s - %(message)s') ch.setFormatter(formatter) fh.setFormatter(formatter) log.addHandler(fh) log.addHandler(ch) ######################################################### from dataloader import get_data_loader train_loader, test_loader = \ get_data_loader(dataset = args.dataset, train_batch_size = args.batch_size, test_batch_size = args.test_batch_size, use_cuda=args.cuda) model = mbnet(cfg=checkpoint['cfg'], dataset=args.dataset, dummy_layer=args.layer) # load weights, otherwise, only the arch is used print(model.cfg) if not os.path.exists('config'): os.makedirs('config') if args.layer != -1: pickle.dump( model.cfg, open('config/{}_{}.pkl'.format(args.dataset, str(args.rd)), 'wb')) else: pickle.dump( model.cfg, open('config/{}_{}.pkl'.format(args.dataset, str(args.rd + 1)), 'wb')) if not args.retrain:
log.setLevel(logging.INFO) ch = logging.StreamHandler() fh = logging.FileHandler(os.path.join(args.save, logging_file)) formatter = logging.Formatter('%(asctime)s - %(message)s') ch.setFormatter(formatter) fh.setFormatter(formatter) log.addHandler(fh) log.addHandler(ch) ######################################################### train_loader, test_loader = \ get_data_loader(dataset = args.dataset, train_batch_size = args.batch_size, test_batch_size = args.test_batch_size, use_cuda=args.cuda) model = mbnet(dataset = args.dataset) model.to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # additional subgradient descent on the sparsity-induced penalty term def updateBN(): for m in model.modules(): if isinstance(m, MbBlock): m.bn2.weight.grad.data.add_(args.s*torch.sign(m.bn2.weight.data)) # L1 elif isinstance(m, ConvBlock): m.bn.weight.grad.data.add_(args.s*torch.sign(m.bn.weight.data)) # L1 def train(epoch): model.train() #global history_score
log.info('model save path: {}'.format(model_save_path)) log.info('log save path: {}'.format(logging_file_path)) ##### check exsiting models ## #if os.path.isfile(model_save_path) and args.resume: # pre_check = torch.load(model_save_path) # if pre_check['epoch'] == args.epochs and pre_check['cfg'] == checkpoint['cfg']: # print('no need to run, load from {}'.format(model_save_path)) # sys.exit(0) from dataloader import get_data_loader train_loader, test_loader = \ get_data_loader(dataset = args.dataset, train_batch_size = args.batch_size, test_batch_size = args.test_batch_size, use_cuda=args.cuda) model = mbnet(cfg=checkpoint['cfg'], dataset=args.dataset) # load weights, otherwise, only the arch is used if not args.retrain: model.load_state_dict(checkpoint['state_dict']) if args.cuda: model.cuda() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) def train(epoch): model.train() avg_loss = 0.