def run(config): strategy, AUTO = setStrategy(config) optimizer = get_optimizer(config) scheduler = get_scheduler( config, num_replicas_in_sync=strategy.num_replicas_in_sync) criterion = get_loss(config) # if 'focal' in config.LOSS.NAME: # if 'categorical_focal_loss' == config.LOSS.NAME: # metric = ['sparse_categorical_accuracy'] # else: # metric = ['accuracy'] # else: # metric = ['sparse_categorical_accuracy'] metric = [config.MODEL.METRIC] # writer = tf.summary.create_file_writer(os.path.join(config.TRAIN_DIR+config.RECIPE, 'logs')) # writer.set_as_default() if 'focal' in config.LOSS.NAME or 'categorical_crossentropy' == config.LOSS.NAME: isOneHot = True else: isOneHot = False dataset = DataSet(config, num_replicas_in_sync=strategy.num_replicas_in_sync, AUTO=AUTO, isOneHot=isOneHot) train(config, dataset, optimizer, criterion, scheduler, metric, strategy)
def run(config): model = get_model(config).cuda() criterion = get_loss(config) optimizer = get_optimizer(config, model.parameters()) checkpoint = utils.checkpoint.get_initial_checkpoint(config) if checkpoint is not None: last_epoch, score = utils.checkpoint.load_checkpoint(config, model, checkpoint) else: print('[*] no checkpoint found') last_epoch, score = -1, -1 print('last epoch:{} score:{:.4f}'.format(last_epoch, score)) optimizer.param_groups[0]['initial_lr'] = config.OPTIMIZER.LR scheduler = get_scheduler(config, optimizer, last_epoch) if last_epoch != -1: scheduler.step() writer = SummaryWriter(os.path.join(config.TRAIN_DIR, 'logs')) train_loader = get_dataloader(config, 'train', transform=transforms.Compose([Albu(), Normalize(), ToTensor()])) test_loader = get_dataloader(config, 'val', transform=transforms.Compose([Normalize(), ToTensor()])) train(config, model, train_loader, test_loader, criterion, optimizer, scheduler, writer, last_epoch+1, score)
def run(config): model = get_model(config).to(device) criterion = get_loss(config.LOSS.NAME) optimizer = get_optimizer(config, model.parameters()) checkpoint = utils.checkpoint.get_initial_checkpoint(config) if checkpoint is not None: last_epoch, score, loss = utils.checkpoint.load_checkpoint( config, model, checkpoint) else: print('[*] no checkpoint found') last_epoch, score, loss = -1, -1, float('inf') print('last epoch:{} score:{:.4f} loss:{:.4f}'.format( last_epoch, score, loss)) optimizer.param_groups[0]['initial_lr'] = config.OPTIMIZER.LR scheduler = get_scheduler(config, optimizer, last_epoch) if config.SCHEDULER.NAME == 'multi_step': milestones = scheduler.state_dict()['milestones'] step_count = len([i for i in milestones if i < last_epoch]) optimizer.param_groups[0]['lr'] *= scheduler.state_dict( )['gamma']**step_count if last_epoch != -1: scheduler.step() writer = SummaryWriter(os.path.join(config.TRAIN_DIR, 'logs')) train_loader = get_dataloader(config, 'train', transform=transforms.Compose([ Albu(), CV2_Resize(config.DATA.IMG_W, config.DATA.IMG_H), Normalize(), ToTensor() ])) val_loader = get_dataloader(config, 'val', transform=transforms.Compose([ CV2_Resize(config.DATA.IMG_W, config.DATA.IMG_H), Normalize(), ToTensor() ])) train(config, model, train_loader, val_loader, criterion, optimizer, scheduler, writer, last_epoch + 1, score, loss)
def run(config): model = get_model(config).to(device) # model_params = [{'params': model.encoder.parameters(), 'lr': config.OPTIMIZER.ENCODER_LR}, # {'params': model.decoder.parameters(), 'lr': config.OPTIMIZER.DECODER_LR}] optimizer = get_optimizer(config, model.parameters()) # optimizer = get_optimizer(config, model_params) checkpoint = utils.checkpoint.get_initial_checkpoint(config) if checkpoint is not None: last_epoch, score, loss = utils.checkpoint.load_checkpoint(config, model, checkpoint) else: print('[*] no checkpoint found') last_epoch, score, loss = -1, -1, float('inf') print('last epoch:{} score:{:.4f} loss:{:.4f}'.format(last_epoch, score, loss)) optimizer.param_groups[0]['initial_lr'] = config.OPTIMIZER.LR # optimizer.param_groups[0]['initial_lr'] = config.OPTIMIZER.ENCODER_LR # optimizer.param_groups[1]['initial_lr'] = config.OPTIMIZER.DECODER_LR scheduler = get_scheduler(config, optimizer, last_epoch) if config.SCHEDULER.NAME == 'multi_step': milestones = scheduler.state_dict()['milestones'] step_count = len([i for i in milestones if i < last_epoch]) optimizer.param_groups[0]['lr'] *= scheduler.state_dict()['gamma'] ** step_count # optimizer.param_groups[0]['lr'] *= scheduler.state_dict()['gamma'] ** step_count # optimizer.param_groups[1]['lr'] *= scheduler.state_dict()['gamma'] ** step_count if last_epoch != -1: scheduler.step() log_train = Logger() log_val = Logger() log_train.open(os.path.join(config.TRAIN_DIR, 'log_train.txt'), mode='a') log_val.open(os.path.join(config.TRAIN_DIR, 'log_val.txt'), mode='a') train_loader = get_dataloader(config, 'train', transform=Albu(config.ALBU)) val_loader = get_dataloader(config, 'val') train(config, model, train_loader, val_loader, optimizer, scheduler, log_train, log_val, last_epoch+1, score, loss)
def run(config): model = get_model(config).to(device) optimizer = get_optimizer(config, model.parameters()) checkpoint = utils.checkpoint.get_initial_checkpoint(config) if checkpoint is not None: last_epoch, score, loss = utils.checkpoint.load_checkpoint( config, model, checkpoint) else: print('[*] no checkpoint found') last_epoch, score, loss = -1, -1, float('inf') print('last epoch:{} score:{:.4f} loss:{:.4f}'.format( last_epoch, score, loss)) optimizer.param_groups[0]['initial_lr'] = config.OPTIMIZER.LR scheduler = get_scheduler(config, optimizer, last_epoch) if config.SCHEDULER.NAME == 'multi_step': milestones = scheduler.state_dict()['milestones'] step_count = len([i for i in milestones if i < last_epoch]) optimizer.param_groups[0]['lr'] *= scheduler.state_dict( )['gamma']**step_count if last_epoch != -1: scheduler.step() # writer = SummaryWriter(os.path.join(config.TRAIN_DIR, 'logs')) log_train = Logger() log_val = Logger() log_train.open(os.path.join(config.TRAIN_DIR, 'log_train.txt'), mode='a') log_val.open(os.path.join(config.TRAIN_DIR, 'log_val.txt'), mode='a') augmentation = Albu_Seg() if config.TASK == 'seg' else Albu_Cls() train_loader = get_dataloader(config, 'train', transform=augmentation) val_loader = get_dataloader(config, 'val') train(config, model, train_loader, val_loader, optimizer, scheduler, log_train, log_val, last_epoch + 1, score, loss)
def main(): seed_everything() # yml = 'configs/base.yml' # config = utils.config.load(yml) # pprint.pprint(config, indent=2) model = get_model(config).cuda() bind_model(model) args = get_args() if args.pause: ## test mode일 때 print('Inferring Start...') nsml.paused(scope=locals()) if args.mode == 'train': ### training mode일 때 print('Training Start...') # no bias decay if config.OPTIMIZER.NO_BIAS_DECAY: group_decay, group_no_decay = group_weight(model) params = [{'params': group_decay}, {'params': group_no_decay, 'weight_decay': 0.0}] else: params = model.parameters() optimizer = get_optimizer(config, params) optimizer.param_groups[0]['initial_lr'] = config.OPTIMIZER.LR if config.OPTIMIZER.NO_BIAS_DECAY: optimizer.param_groups[1]['initial_lr'] = config.OPTIMIZER.LR ############################################################################################### if IS_LOCAL: prepare_train_directories(config) utils.config.save_config(yml, config.LOCAL_TRAIN_DIR) checkpoint = utils.checkpoint.get_initial_checkpoint(config) if checkpoint is not None: last_epoch, score, loss = utils.checkpoint.load_checkpoint(config, model, checkpoint) else: print('[*] no checkpoint found') last_epoch, score, loss = -1, -1, float('inf') print('last epoch:{} score:{:.4f} loss:{:.4f}'.format(last_epoch, score, loss)) else: last_epoch = -1 ############################################################################################### scheduler = get_scheduler(config, optimizer, last_epoch=last_epoch) ############################################################################################### if IS_LOCAL: if config.SCHEDULER.NAME == 'multi_step': if config.SCHEDULER.WARMUP: scheduler_dict = scheduler.state_dict()['after_scheduler'].state_dict() else: scheduler_dict = scheduler.state_dict() milestones = scheduler_dict['milestones'] step_count = len([i for i in milestones if i < last_epoch]) optimizer.param_groups[0]['lr'] *= scheduler_dict['gamma'] ** step_count if config.OPTIMIZER.NO_BIAS_DECAY: optimizer.param_groups[1]['initial_lr'] *= scheduler_dict['gamma'] ** step_count if last_epoch != -1: scheduler.step() ############################################################################################### # for dirname, _, filenames in os.walk(DATASET_PATH): # for filename in filenames: # print(os.path.join(dirname, filename)) # if preprocessing possible preprocess_type = config.DATA.PREPROCESS cv2_size = (config.DATA.IMG_W, config.DATA.IMG_H) if not IS_LOCAL: preprocess(os.path.join(DATASET_PATH, 'train', 'train_data', 'NOR'), os.path.join(preprocess_type, 'NOR'), preprocess_type, cv2_size) preprocess(os.path.join(DATASET_PATH, 'train', 'train_data', 'AMD'), os.path.join(preprocess_type, 'AMD'), preprocess_type, cv2_size) preprocess(os.path.join(DATASET_PATH, 'train', 'train_data', 'RVO'), os.path.join(preprocess_type, 'RVO'), preprocess_type, cv2_size) preprocess(os.path.join(DATASET_PATH, 'train', 'train_data', 'DMR'), os.path.join(preprocess_type, 'DMR'), preprocess_type, cv2_size) data_dir = preprocess_type # data_dir = os.path.join(DATASET_PATH, 'train/train_data') else: # IS_LOCAL data_dir = os.path.join(DATASET_PATH, preprocess_type) # eda # train_std(data_dir, preprocess_type, cv2_size) fold_df = split_cv(data_dir, n_splits=config.NUM_FOLDS) val_fold_idx = config.IDX_FOLD ############################################################################################### train_loader = get_dataloader(config, data_dir, fold_df, val_fold_idx, 'train', transform=Albu()) val_loader = get_dataloader(config, data_dir, fold_df, val_fold_idx, 'val') postfix = dict() num_epochs = config.TRAIN.NUM_EPOCHS val_acc_list = [] for epoch in range(last_epoch+1, num_epochs): if epoch >= config.LOSS.FINETUNE_EPOCH: criterion = get_loss(config.LOSS.FINETUNE_LOSS) else: criterion = get_loss(config.LOSS.NAME) train_values = train_single_epoch(config, model, train_loader, criterion, optimizer, scheduler, epoch) val_values = evaluate_single_epoch(config, model, val_loader, criterion, epoch) val_acc_list.append((epoch, val_values[2])) if config.SCHEDULER.NAME != 'one_cyle_lr': scheduler.step() if IS_LOCAL: utils.checkpoint.save_checkpoint(config, model, epoch, val_values[1], val_values[0]) else: postfix['train_loss'] = train_values[0] postfix['train_res'] = train_values[1] postfix['train_acc'] = train_values[2] postfix['train_sens'] = train_values[3] postfix['train_spec'] = train_values[4] postfix['val_loss'] = val_values[0] postfix['val_res'] = val_values[1] postfix['val_acc'] = val_values[2] postfix['val_sens'] = val_values[3] postfix['val_spec'] = val_values[4] nsml.report(**postfix, summary=True, step=epoch) val_res = '%.10f' % val_values[1] val_res = val_res.replace(".", "") val_res = val_res[:4] + '.' + val_res[4:8] + '.' + val_res[8:] save_name = 'epoch_%02d_score%s_loss%.4f.pth' % (epoch, val_res, val_values[0]) # nsml.save(save_name) nsml.save(epoch) for e, val_acc in val_acc_list: print('%02d %s' % (e, val_acc))