parser = argparse.ArgumentParser(description='PyTorch my data Training') args = parse_opts() max_epoch = args.max_epoch - args.distill_epoch device = 'cuda' if torch.cuda.is_available() else 'cpu' log = Logger('both', filename=os.path.join(args.log_dir, args.log_file + '_all'), level='debug', mode='both') logger = log.logger.info log_config(args, logger) log_file = Logger('file', filename=os.path.join(args.log_dir, args.log_file), level='debug', mode='file') logger_file = log_file.logger.info attr, attr_name = get_tasks(args) criterion_CE, metrics = get_losses_metrics(attr, args.categorical_loss) # Load dataset, net, evaluator, Saver trainloader, testloader = get_data(args, attr, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) t_net, s_net, channel_t, channel_s, layer_t, layer_s, index, filter_list = \ get_pair_model(args.size, frm='my', name_t=args.name_t, name_s=args.name_s, load_BN=args.load_BN, logger=logger, bucket=args.bucket, classifier=args.classifier) if args.direct_connect: distill_net = AB_distill_Mobilenetl2MobilenetsNoConnect(t_net, s_net, args.batch_size, args.DTL, args.AB_loss_multiplier, args.DTL_loss_multiplier, channel_t, channel_s, layer_t, layer_s, criterion_CE, index, args.DTL_loss) else: distill_net = AB_distill_Mobilenetl2Mobilenets(t_net, s_net, args.batch_size, args.DTL, args.AB_loss_multiplier, args.DTL_loss_multiplier, channel_t, channel_s, layer_t, layer_s, criterion_CE, args.stage1, args.DTL_loss) if device == 'cuda': s_net = torch.nn.DataParallel(s_net).cuda() distill_net = torch.nn.DataParallel(distill_net).cuda() cudnn.benchmark = True params_list = [{'params': filter(lambda p: id(p) not in filter_list, s_net.parameters())}] if args.stage1 and not args.direct_connect:
from data.get_data import get_data if __name__ == '__main__': print(get_data())
def main(): logger.info("Logger is set - training start") # set default gpu device id # torch.cuda.set_device(config.gpus[0]) # set seed np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) if config.deterministic: torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.backends.cudnn.enabled = True else: torch.backends.cudnn.benchmark = True # get data with meta info if config.data_loader_type == 'torch': input_size, input_channels, n_classes, train_data, valid_data = get_data.get_data( config.dataset, config.data_path, config.cutout_length, auto_augmentation=config.auto_augmentation) train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, shuffle=True, num_workers=config.workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True) elif config.data_loader_type == 'dali': input_size, input_channels, n_classes, train_data, valid_data = get_data.get_data_dali( config.dataset, config.data_path, batch_size=config.batch_size, num_threads=config.workers) train_loader = train_data valid_loader = valid_data else: raise NotImplementedError if config.label_smoothing > 0: from utils import LabelSmoothLoss criterion = LabelSmoothLoss(smoothing=config.label_smoothing).to(device) else: criterion = nn.CrossEntropyLoss().to(device) use_aux = config.aux_weight > 0. if config.model_method == 'darts_NAS': if config.genotype is None: config.genotype = get_model.get_model(config.model_method, config.model_name) if 'imagenet' in config.dataset.lower(): model = AugmentCNN_ImageNet(input_size, input_channels, config.init_channels, n_classes, config.layers, use_aux, config.genotype) else: model = AugmentCNN(input_size, input_channels, config.init_channels, n_classes, config.layers, use_aux, config.genotype) elif config.model_method == 'my_model_collection': from models.my_searched_model import my_specialized if config.structure_path is None: _ = config.model_name.split(':') net_config_path = os.path.join(project_path, 'models', 'my_model_collection', _[0], _[1] + '.json') else: net_config_path = config.structure_path model = my_specialized(num_classes=n_classes, net_config=net_config_path, dropout_rate=config.dropout_rate) else: model_fun = get_model.get_model(config.model_method, config.model_name) model = model_fun(num_classes=n_classes, dropout_rate=config.dropout_rate) # set bn model.set_bn_param(config.bn_momentum, config.bn_eps) # model init model.init_model(model_init=config.model_init) model.cuda() # model size total_ops, total_params = flops_counter.profile(model, [1, input_channels, input_size, input_size]) logger.info("Model size = {:.3f} MB".format(total_params)) logger.info("Model FLOPS with input {} = {:.3f} M".format(str([1, input_channels, input_size, input_size]), total_ops)) total_ops, total_params = flops_counter.profile(model, [1, 3, 224, 224]) logger.info("Model FLOPS with input [1,3,224,224] {:.3f} M".format(total_ops)) model = nn.DataParallel(model).to(device) # weights optimizer if not config.no_decay_keys == 'None': keys = config.no_decay_keys.split('#') optimizer = torch.optim.SGD([ {'params': model.module.get_parameters(keys, mode='exclude'), 'weight_decay': config.weight_decay}, {'params': model.module.get_parameters(keys, mode='include'), 'weight_decay': 0}, ], lr=config.lr, momentum=config.momentum) else: optimizer = torch.optim.SGD(model.parameters(), config.lr, momentum=config.momentum, weight_decay=config.weight_decay) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, config.epochs) best_top1 = 0. # training loop _size = get_iterator_length(train_loader) for epoch in range(config.epochs): lr_scheduler.step() if config.drop_path_prob > 0: drop_prob = config.drop_path_prob * epoch / config.epochs model.module.drop_path_prob(drop_prob) # training train(train_loader, model, optimizer, criterion, epoch) # validation cur_step = (epoch+1) * _size top1 = validate(valid_loader, model, criterion, epoch, cur_step) # save if best_top1 < top1: best_top1 = top1 is_best = True logger.info("Current best Prec@1 = {:.4%}".format(best_top1)) else: is_best = False utils.save_checkpoint(model, config.path, is_best) print("") logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
def main(): print("evaluate start") # set default gpu device id # torch.cuda.set_device(config.gpus[0]) # set seed np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) if config.deterministic: torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.backends.cudnn.enabled = True else: torch.backends.cudnn.benchmark = True # get data with meta info if config.data_loader_type == 'torch': input_size, input_channels, n_classes, train_data, valid_data = get_data.get_data( config.dataset, config.data_path, config.cutout_length, auto_augmentation=config.auto_augmentation) # train_loader = torch.utils.data.DataLoader(train_data, # batch_size=config.batch_size, # shuffle=True, # num_workers=config.workers, # pin_memory=True) valid_loader = torch.utils.data.DataLoader( valid_data, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=False) elif config.data_loader_type == 'dali': input_size, input_channels, n_classes, train_data, valid_data = get_data.get_data_dali( config.dataset, config.data_path, batch_size=config.batch_size, num_threads=config.workers) # train_loader = train_data valid_loader = valid_data else: raise NotImplementedError use_aux = config.aux_weight > 0. if config.model_method == 'darts_NAS': if config.genotype is None: config.genotype = get_model.get_model(config.model_method, config.model_name) if 'imagenet' in config.dataset.lower(): model = AugmentCNN_ImageNet(input_size, input_channels, config.init_channels, n_classes, config.layers, use_aux, config.genotype) else: model = AugmentCNN(input_size, input_channels, config.init_channels, n_classes, config.layers, use_aux, config.genotype) elif config.model_method == 'my_model_collection': from models.my_searched_model import my_specialized if config.structure_path is None: _ = config.model_name.split(':') net_config_path = os.path.join(project_path, 'models', 'my_model_collection', _[0], _[1] + '.json') else: net_config_path = config.structure_path # model = my_specialized(num_classes=n_classes, net_config=net_config_path, # dropout_rate=config.dropout_rate) model = my_specialized(num_classes=n_classes, net_config=net_config_path, dropout_rate=0) else: model_fun = get_model.get_model(config.model_method, config.model_name) # model = model_fun(num_classes=n_classes, dropout_rate=config.dropout_rate) model = model_fun(num_classes=n_classes, dropout_rate=0) # load model ckpt = torch.load(config.pretrained) print(ckpt.keys()) # for k in model: # print(k) # return # set bn # model.set_bn_param(config.bn_momentum, config.bn_eps) for _key in list(ckpt['state_dict_ema'].keys()): if 'total_ops' in _key or 'total_params' in _key: del ckpt['state_dict_ema'][_key] model.load_state_dict(ckpt['state_dict_ema']) # model init # model.init_model(model_init=config.model_init) model.cuda() # model size total_ops, total_params = flops_counter.profile( model, [1, input_channels, input_size, input_size]) print("Model size = {:.3f} MB".format(total_params)) print("Model FLOPS with input {} = {:.3f} M".format( str([1, input_channels, input_size, input_size]), total_ops)) total_ops, total_params = flops_counter.profile(model, [1, 3, 224, 224]) print("Model FLOPS with input [1,3,224,224] {:.3f} M".format(total_ops)) model = nn.DataParallel(model).to(device) # CRITERION if config.label_smoothing > 0: from utils import LabelSmoothLoss criterion = LabelSmoothLoss( smoothing=config.label_smoothing).to(device) else: criterion = nn.CrossEntropyLoss().to(device) best_top1 = validate(valid_loader, model, criterion, 0, 0) print("Final best Prec@1 = {:.4%}".format(best_top1))
from data.get_data import get_data ################################################ # Configuration settings — change as necessary # ################################################ verbose = True timer = True params = [{}] train_limit = 5000 test_limit = 5000 kfolds = 3 ################################################ data = get_data() if timer: start_time = time.time() if verbose: print "Start Training" log_reg = LogisticRegression() grid = GridSearchCV(log_reg, params, cv=kfolds) grid.fit(data.train_inputs[:train_limit], data.train_outputs[:train_limit]) if verbose: print "End Training" if verbose: print "Start Scoring" print grid.score(data.test_inputs[:test_limit], data.test_outputs[:test_limit]) if verbose:
def run(opt): # logging.basicConfig(filename=os.path.join(opt.log_dir, opt.log_file), level=logging.INFO) # logger = logging.getLogger() # # logger.addHandler(logging.StreamHandler()) # logger = logger.info log = Logger(filename=os.path.join(opt.log_dir, opt.log_file), level='debug') logger = log.logger.info # Decide what attrs to train attr, attr_name = get_tasks(opt) # Generate model based on tasks logger('Loading models') model, parameters, mean, std = generate_model(opt, attr) # parameters[0]['lr'] = 0 # parameters[1]['lr'] = opt.lr / 3 logger('Loading dataset') train_loader, val_loader = get_data(opt, attr, mean, std) writer = create_summary_writer(model, train_loader, opt.log_dir) # have to after writer model = nn.DataParallel(model, device_ids=None) # Learning configurations if opt.optimizer == 'sgd': optimizer = SGD(parameters, lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay, nesterov=opt.nesterov) elif opt.optimizer == 'adam': optimizer = Adam(parameters, lr=opt.lr, betas=opt.betas) else: raise Exception("Not supported") scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=opt.lr_patience, factor=opt.factor, min_lr=1e-6) # Loading checkpoint if opt.checkpoint: logger('loading checkpoint {}'.format(opt.checkpoint)) checkpoint = torch.load(opt.checkpoint) opt.begin_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) device = 'cuda' loss_fns, metrics = get_losses_metrics(attr, opt.categorical_loss, opt.at, opt.at_loss) trainer = my_trainer( model, optimizer, lambda pred, target, epoch: multitask_loss( pred, target, loss_fns, len(attr_name), opt.at_coe, epoch), device=device) train_evaluator = create_supervised_evaluator( model, metrics={'multitask': MultiAttributeMetric(metrics, attr_name)}, device=device) val_evaluator = create_supervised_evaluator( model, metrics={'multitask': MultiAttributeMetric(metrics, attr_name)}, device=device) # Training timer handlers model_timer, data_timer = Timer(average=True), Timer(average=True) model_timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) data_timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_COMPLETED, pause=Events.ITERATION_STARTED, step=Events.ITERATION_STARTED) # Training log/plot handlers @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): iter_num = (engine.state.iteration - 1) % len(train_loader) + 1 if iter_num % opt.log_interval == 0: logger( "Epoch[{}] Iteration[{}/{}] Sum Loss: {:.2f} Cls Loss: {:.2f} At Loss: {:.2f} " "Coe: {:.2f} Model Process: {:.3f}s/batch Data Preparation: {:.3f}s/batch" .format(engine.state.epoch, iter_num, len(train_loader), engine.state.output['sum'], engine.state.output['cls'], engine.state.output['at'], engine.state.output['coe'], model_timer.value(), data_timer.value())) writer.add_scalar("training/loss", engine.state.output['sum'], engine.state.iteration) # Log/Plot Learning rate @trainer.on(Events.EPOCH_STARTED) def log_learning_rate(engine): lr = optimizer.param_groups[-1]['lr'] logger('Epoch[{}] Starts with lr={}'.format(engine.state.epoch, lr)) writer.add_scalar("learning_rate", lr, engine.state.epoch) # Checkpointing @trainer.on(Events.EPOCH_COMPLETED) def save_checkpoint(engine): if engine.state.epoch % opt.save_interval == 0: save_file_path = os.path.join( opt.log_dir, 'save_{}.pth'.format(engine.state.epoch)) states = { 'epoch': engine.state.epoch, 'arch': opt.model, 'state_dict': model.module.state_dict(), 'optimizer': optimizer.state_dict(), } torch.save(states, save_file_path) # model.eval() # example = torch.rand(1, 3, 224, 224) # traced_script_module = torch.jit.trace(model, example) # traced_script_module.save(save_file_path) # model.train() # torch.save(model._modules.state_dict(), save_file_path) # val_evaluator event handlers @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): data_list = [train_loader, val_loader] name_list = ['train', 'val'] eval_list = [train_evaluator, val_evaluator] for data, name, evl in zip(data_list, name_list, eval_list): evl.run(data) metrics_info = evl.state.metrics["multitask"] for m, val in metrics_info['metrics'].items(): writer.add_scalar(name + '_metrics/{}'.format(m), val, engine.state.epoch) for m, val in metrics_info['summaries'].items(): writer.add_scalar(name + '_summary/{}'.format(m), val, engine.state.epoch) logger( name + ": Validation Results - Epoch: {}".format(engine.state.epoch)) print_summar_table(logger, attr_name, metrics_info['logger']) # Update Learning Rate if name == 'train': scheduler.step(metrics_info['logger']['attr']['ap'][-1]) # kick everything off logger('Start training') trainer.run(train_loader, max_epochs=opt.n_epochs) writer.close()