def hp_search(trial: optuna.Trial): if torch.cuda.is_available(): logger.info("%s", torch.cuda.get_device_name(0)) global gopt opt = gopt # set config config = load_config(opt) config['opt'] = opt logger.info("%s", config) # set path set_path(config) # set search spaces lr = trial.suggest_loguniform('lr', 1e-6, 1e-3) # .suggest_float('lr', 1e-6, 1e-3, log=True) bsz = trial.suggest_categorical('batch_size', [32, 64, 128]) seed = trial.suggest_int('seed', 17, 42) epochs = trial.suggest_int('epochs', 1, opt.epoch) # prepare train, valid dataset train_loader, valid_loader = prepare_datasets(config, hp_search_bsz=bsz) with temp_seed(seed): # prepare model model = prepare_model(config) # create optimizer, scheduler, summary writer, scaler optimizer, scheduler, writer, scaler = prepare_osws(config, model, train_loader, hp_search_lr=lr) config['optimizer'] = optimizer config['scheduler'] = scheduler config['writer'] = writer config['scaler'] = scaler early_stopping = EarlyStopping(logger, patience=opt.patience, measure=opt.measure, verbose=1) best_eval_measure = float('inf') if opt.measure == 'loss' else -float('inf') for epoch in range(epochs): eval_loss, eval_acc = train_epoch(model, config, train_loader, valid_loader, epoch) if opt.measure == 'loss': eval_measure = eval_loss else: eval_measure = eval_acc # early stopping if early_stopping.validate(eval_measure, measure=opt.measure): break if opt.measure == 'loss': is_best = eval_measure < best_eval_measure else: is_best = eval_measure > best_eval_measure if is_best: best_eval_measure = eval_measure early_stopping.reset(best_eval_measure) early_stopping.status() trial.report(eval_acc, epoch) if trial.should_prune(): raise optuna.TrialPruned() return eval_acc
def train(opt): if torch.cuda.is_available(): logger.info("%s", torch.cuda.get_device_name(0)) # set etc torch.autograd.set_detect_anomaly(True) # set config config = load_config(opt) config['opt'] = opt logger.info("%s", config) # set path set_path(config) # prepare train, valid dataset train_loader, valid_loader = prepare_datasets(config) with temp_seed(opt.seed): # prepare model model = prepare_model(config) # create optimizer, scheduler, summary writer, scaler optimizer, scheduler, writer, scaler = prepare_osws( config, model, train_loader) config['optimizer'] = optimizer config['scheduler'] = scheduler config['writer'] = writer config['scaler'] = scaler # training early_stopping = EarlyStopping(logger, patience=opt.patience, measure='f1', verbose=1) local_worse_epoch = 0 best_eval_f1 = -float('inf') for epoch_i in range(opt.epoch): epoch_st_time = time.time() eval_loss, eval_f1, best_eval_f1 = train_epoch( model, config, train_loader, valid_loader, epoch_i, best_eval_f1) # early stopping if early_stopping.validate(eval_f1, measure='f1'): break if eval_f1 == best_eval_f1: early_stopping.reset(best_eval_f1) early_stopping.status()
def train(opt): if torch.cuda.is_available(): logger.info("%s", torch.cuda.get_device_name(0)) # set etc torch.autograd.set_detect_anomaly(True) # set config config = load_config(opt) config['opt'] = opt logger.info("%s", config) # set path set_path(config) # prepare train, valid dataset train_loader, valid_loader = prepare_datasets(config) with temp_seed(opt.seed): # prepare model model = prepare_model(config) # create optimizer, scheduler, summary writer, scaler optimizer, scheduler, writer, scaler = prepare_osws(config, model, train_loader) config['optimizer'] = optimizer config['scheduler'] = scheduler config['writer'] = writer config['scaler'] = scaler # training early_stopping = EarlyStopping(logger, patience=opt.patience, measure='f1', verbose=1) local_worse_steps = 0 prev_eval_f1 = -float('inf') best_eval_f1 = -float('inf') for epoch_i in range(opt.epoch): epoch_st_time = time.time() eval_loss, eval_f1 = train_epoch(model, config, train_loader, valid_loader, epoch_i) # early stopping if early_stopping.validate(eval_f1, measure='f1'): break if eval_f1 > best_eval_f1: best_eval_f1 = eval_f1 if opt.save_path: logger.info("[Best model saved] : {:10.6f}".format(best_eval_f1)) save_model(config, model) # save finetuned bert model/config/tokenizer if config['emb_class'] in ['bert', 'distilbert', 'albert', 'roberta', 'bart', 'electra']: if not os.path.exists(opt.bert_output_dir): os.makedirs(opt.bert_output_dir) model.bert_tokenizer.save_pretrained(opt.bert_output_dir) model.bert_model.save_pretrained(opt.bert_output_dir) early_stopping.reset(best_eval_f1) early_stopping.status() # begin: scheduling, apply rate decay at the measure(ex, loss) getting worse for the number of deacy epoch steps. if prev_eval_f1 >= eval_f1: local_worse_steps += 1 else: local_worse_steps = 0 logger.info('Scheduler: local_worse_steps / opt.lr_decay_steps = %d / %d' % (local_worse_steps, opt.lr_decay_steps)) if not opt.use_transformers_optimizer and \ epoch_i > opt.warmup_epoch and \ (local_worse_steps >= opt.lr_decay_steps or early_stopping.step() > opt.lr_decay_steps): scheduler.step() local_worse_steps = 0 prev_eval_f1 = eval_f1
def hp_search_optuna(trial: optuna.Trial): global gargs args = gargs # set config config = load_config(args) config['args'] = args logger.info("%s", config) # set path set_path(config) # create accelerator accelerator = Accelerator() config['accelerator'] = accelerator args.device = accelerator.device # set search spaces lr = trial.suggest_float('lr', 1e-5, 1e-3, log=True) bsz = trial.suggest_categorical('batch_size', [8, 16, 32, 64]) seed = trial.suggest_int('seed', 17, 42) epochs = trial.suggest_int('epochs', 1, args.epoch) # prepare train, valid dataset train_loader, valid_loader = prepare_datasets(config, hp_search_bsz=bsz) with temp_seed(seed): # prepare model model = prepare_model(config) # create optimizer, scheduler, summary writer model, optimizer, scheduler, writer = prepare_others(config, model, train_loader, lr=lr) # create secondary optimizer, scheduler _, optimizer_2nd, scheduler_2nd, _ = prepare_others( config, model, train_loader, lr=args.bert_lr_during_freezing) train_loader = accelerator.prepare(train_loader) valid_loader = accelerator.prepare(valid_loader) config['optimizer'] = optimizer config['scheduler'] = scheduler config['optimizer_2nd'] = optimizer_2nd config['scheduler_2nd'] = scheduler_2nd config['writer'] = writer total_batch_size = args.batch_size * accelerator.num_processes * args.gradient_accumulation_steps logger.info("***** Running training *****") logger.info(f" Num examples = {len(train_loader)}") logger.info(f" Num Epochs = {args.epoch}") logger.info( f" Instantaneous batch size per device = {args.batch_size}") logger.info( f" Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}" ) logger.info( f" Gradient Accumulation steps = {args.gradient_accumulation_steps}" ) logger.info(f" Total optimization steps = {args.max_train_steps}") early_stopping = EarlyStopping(logger, patience=args.patience, measure='f1', verbose=1) best_eval_f1 = -float('inf') for epoch in range(epochs): eval_loss, eval_f1, best_eval_f1 = train_epoch( model, config, train_loader, valid_loader, epoch, best_eval_f1) # early stopping if early_stopping.validate(eval_f1, measure='f1'): break if eval_f1 == best_eval_f1: early_stopping.reset(best_eval_f1) early_stopping.status() trial.report(eval_f1, epoch) if trial.should_prune(): raise optuna.TrialPruned() return eval_f1
def train(args): # set etc torch.autograd.set_detect_anomaly(False) # set config config = load_config(args) config['args'] = args logger.info("%s", config) # set path set_path(config) # create accelerator accelerator = Accelerator() config['accelerator'] = accelerator args.device = accelerator.device # prepare train, valid dataset train_loader, valid_loader = prepare_datasets(config) with temp_seed(args.seed): # prepare model model = prepare_model(config) # create optimizer, scheduler, summary writer model, optimizer, scheduler, writer = prepare_others( config, model, train_loader) # create secondary optimizer, scheduler _, optimizer_2nd, scheduler_2nd, _ = prepare_others( config, model, train_loader, lr=args.bert_lr_during_freezing) train_loader = accelerator.prepare(train_loader) valid_loader = accelerator.prepare(valid_loader) config['optimizer'] = optimizer config['scheduler'] = scheduler config['optimizer_2nd'] = optimizer_2nd config['scheduler_2nd'] = scheduler_2nd config['writer'] = writer total_batch_size = args.batch_size * accelerator.num_processes * args.gradient_accumulation_steps logger.info("***** Running training *****") logger.info(f" Num examples = {len(train_loader)}") logger.info(f" Num Epochs = {args.epoch}") logger.info( f" Instantaneous batch size per device = {args.batch_size}") logger.info( f" Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}" ) logger.info( f" Gradient Accumulation steps = {args.gradient_accumulation_steps}" ) logger.info(f" Total optimization steps = {args.max_train_steps}") # training early_stopping = EarlyStopping(logger, patience=args.patience, measure='f1', verbose=1) local_worse_epoch = 0 best_eval_f1 = -float('inf') for epoch_i in range(args.epoch): epoch_st_time = time.time() eval_loss, eval_f1, best_eval_f1 = train_epoch( model, config, train_loader, valid_loader, epoch_i, best_eval_f1) # early stopping if early_stopping.validate(eval_f1, measure='f1'): break if eval_f1 == best_eval_f1: early_stopping.reset(best_eval_f1) early_stopping.status()
def train(opt): if torch.cuda.is_available(): logger.info("%s", torch.cuda.get_device_name(0)) # set etc torch.autograd.set_detect_anomaly(True) # set config config = load_config(opt) config['opt'] = opt logger.info("%s", config) # set path set_path(config) # prepare train, valid dataset train_loader, valid_loader = prepare_datasets(config) with temp_seed(opt.seed): # prepare model model = prepare_model(config) # create optimizer, scheduler, summary writer, scaler optimizer, scheduler, writer, scaler = prepare_osws( config, model, train_loader) config['optimizer'] = optimizer config['scheduler'] = scheduler config['writer'] = writer config['scaler'] = scaler # training early_stopping = EarlyStopping(logger, patience=opt.patience, measure=opt.measure, verbose=1) local_worse_epoch = 0 best_eval_measure = float( 'inf') if opt.measure == 'loss' else -float('inf') for epoch_i in range(opt.epoch): epoch_st_time = time.time() eval_loss, eval_acc, best_eval_measure = train_epoch( model, config, train_loader, valid_loader, epoch_i, best_eval_measure) # for nni if opt.hp_search_nni: nni.report_intermediate_result(eval_acc) logger.info('[eval_acc] : %g', eval_acc) logger.info('[Pipe send intermediate result done]') if opt.measure == 'loss': eval_measure = eval_loss else: eval_measure = eval_acc # early stopping if early_stopping.validate(eval_measure, measure=opt.measure): break if eval_measure == best_eval_measure: early_stopping.reset(best_eval_measure) early_stopping.status() # for nni if opt.hp_search_nni: nni.report_final_result(eval_acc) logger.info('[Final result] : %g', eval_acc) logger.info('[Send final result done]')