def test_pytorch_lightning_pruning_callback_monitor_is_invalid() -> None: study = optuna.create_study(pruner=DeterministicPruner(True)) trial = study.ask() callback = PyTorchLightningPruningCallback(trial, "InvalidMonitor") trainer = pl.Trainer( max_epochs=1, enable_checkpointing=False, callbacks=[callback], ) model = Model() with pytest.warns(UserWarning): callback.on_validation_end(trainer, model)
def __call__(self, trial: optuna.trial.Trial) -> float: data = TreeDataModule( self._filename, batch_size=trial.suggest_int("batch_size", 32, 160, 32), ) kwargs = { "lstm_size": trial.suggest_categorical("lstm_size", [512, 1024, 2048]), "dropout_prob": trial.suggest_float("dropout", 0.1, 0.5, step=0.1), "learning_rate": trial.suggest_float("lr", 1e-3, 1e-1, log=True), "weight_decay": trial.suggest_float("weight_decay", 1e-3, 1e-1, log=True), } model = RouteDistanceModel(**kwargs) gpus = int(torch.cuda.is_available()) pruning_callback = PyTorchLightningPruningCallback( trial, monitor="val_monitor") trainer = Trainer( gpus=gpus, logger=True, # become a tensorboard logger checkpoint_callback=False, callbacks=[pruning_callback], # type: ignore max_epochs=EPOCHS, ) trainer.fit(model, datamodule=data) return trainer.callback_metrics["val_monitor"].item()
def objective(trial: optuna.trial.Trial) -> float: # We optimize the number of layers, hidden units in each layer and dropouts. n_layers = trial.suggest_int("n_layers", 1, 3) dropout = trial.suggest_float("dropout", 0.2, 0.5) output_dims = [ trial.suggest_int("n_units_l{}".format(i), 4, 128, log=True) for i in range(n_layers) ] model = LightningNet(dropout, output_dims) datamodule = MNISTDataModule(data_dir=DIR, batch_size=BATCHSIZE) trainer = pl.Trainer( logger=True, limit_val_batches=PERCENT_VALID_EXAMPLES, checkpoint_callback=False, max_epochs=EPOCHS, gpus=-1 if torch.cuda.is_available() else None, callbacks=[PyTorchLightningPruningCallback(trial, monitor="val_acc")], ) hyperparameters = dict(n_layers=n_layers, dropout=dropout, output_dims=output_dims) trainer.logger.log_hyperparams(hyperparameters) trainer.fit(model, datamodule=datamodule) return trainer.callback_metrics["val_acc"].item()
def objective(trial: optuna.Trial): # Filenames for each trial must be made unique in order to access each checkpoint. checkpoint_callback = pl.callbacks.ModelCheckpoint(os.path.join( MODEL_DIR, "trial_{}".format(trial.number), "{epoch}"), monitor="val_acc") # The default logger in PyTorch Lightning writes to event files to be consumed by # TensorBoard. We don't use any logger here as it requires us to implement several abstract # methods. Instead we setup a simple callback, that saves metrics from each validation step. metrics_callback = MetricsCallback() trainer = pl.Trainer(logger=True, checkpoint_callback=checkpoint_callback, max_epochs=50, gpus=args.gpu if torch.cuda.is_available() else None, callbacks=[metrics_callback], early_stop_callback=PyTorchLightningPruningCallback( trial, monitor="val_acc")) model = LeNet5(trial) bsz = trial.suggest_int("bsz", 32, 128, 32) train_loader = DataLoader(data_train, batch_size=bsz, shuffle=True) val_loader = DataLoader(data_val, batch_size=1) trainer.fit(model, train_dataloader=train_loader, val_dataloaders=val_loader) return metrics_callback.metrics[-1]["val_acc"]
def test_pytorch_lightning_pruning_callback_monitor_is_invalid() -> None: study = optuna.create_study(pruner=DeterministicPruner(True)) trial = create_running_trial(study, 1.0) callback = PyTorchLightningPruningCallback(trial, "InvalidMonitor") trainer = pl.Trainer( min_epochs=0, # Required to fire the callback after the first epoch. max_epochs=1, checkpoint_callback=False, callbacks=[callback], ) model = Model() with pytest.warns(UserWarning): callback.on_validation_end(trainer, model)
def objective(trial): checkpoint_callback = pl.callbacks.ModelCheckpoint(os.path.join( MODEL_DIR, "trial_{}".format(trial.number)), monitor="accuracy") trainer = pl.Trainer( auto_select_gpus=True, gpus=1, precision=16, profiler=False, max_epochs=1, callbacks=[ pl.callbacks.ProgressBar(), PyTorchLightningPruningCallback(trial, monitor="val_acc"), ], automatic_optimization=True, enable_pl_optimizer=True, logger=logger, accelerator="ddp", plugins="ddp_sharded", ) model = LitModel(trial, num_classes=num_classes) dm = ImDataModule(trial, df, batch_size=batch_size, num_classes=num_classes, img_size=img_size) trainer.fit(model, dm) return trainer.callback_metrics["val_acc_step"].item()
def objective(trial): checkpoint_callback = pl.callbacks.ModelCheckpoint(os.path.join( 'checkpoints', "trial_{}".format(trial.number)), monitor="val_loss") metrics_callback = MetricsCallback() run_name = create_run_name(args) logger = TensorBoardLogger(save_dir='runs_pl_temp/', name=run_name) trainer = pl.Trainer( gpus=1, logger=logger, max_epochs=args.max_epochs, gradient_clip_val=trial.suggest_uniform("clip", 0.1, 0.9), checkpoint_callback=checkpoint_callback, early_stop_callback=PyTorchLightningPruningCallback( trial, monitor="val_loss"), callbacks=[metrics_callback], ) mlp = MLPGenreClassifierModel(args, trial) trainer.fit(mlp) return metrics_callback.metrics[-1]["val_loss"].item()
def optimize(trial: optuna.Trial, data_dict): gts = PurgedGroupTimeSeriesSplit(n_splits=5, group_gap=5) input_size = data_dict['data'].shape[-1] output_size = 1 checkpoint_callback = pl.callbacks.ModelCheckpoint( os.path.join('models/', "trial_resnet_{}".format(trial.number)), monitor="val_mse", mode='min') logger = MetricsCallback() metrics = [] sizes = [] # trial_file = 'HPO/nn_hpo_2021-01-05.pkl' trial_file = None p = create_param_dict(trial, trial_file) p['batch_size'] = trial.suggest_int('batch_size', 8000, 15000) for i, (train_idx, val_idx) in enumerate(gts.split(data_dict['data'], groups=data_dict['era'])): model = Classifier(input_size, output_size, params=p) # model.apply(init_weights) dataset = FinData( data=data_dict['data'], target=data_dict['target'], era=data_dict['era']) dataloaders = create_dataloaders( dataset, indexes={'train': train_idx, 'val': val_idx}, batch_size=p['batch_size']) es = EarlyStopping(monitor='val_mse', patience=10, min_delta=0.0005, mode='min') trainer = pl.Trainer(logger=False, max_epochs=500, gpus=1, callbacks=[checkpoint_callback, logger, PyTorchLightningPruningCallback( trial, monitor='val_mse'), es], precision=16) trainer.fit( model, train_dataloader=dataloaders['train'], val_dataloaders=dataloaders['val']) val_loss = logger.metrics[-1]['val_loss'].item() metrics.append(val_loss) sizes.append(len(train_idx)) metrics_mean = weighted_mean(metrics, sizes) return metrics_mean
def __call__(self, trial): # Filenames for each trial must be made unique in order to access each checkpoint. ckpt_path = os.path.join(self.hparams.output, trial.study.study_name, "trial_{}".format(trial.number), "{epoch:03d}") checkpoint_callback = pl.callbacks.ModelCheckpoint(ckpt_path, monitor=self.monitor_metric) # set hyperparameters under optimization hparams = copy.copy(self.hparams) for k, v in self.get_hparams(trial).items(): setattr(hparams, k, v) model = self.model_cls(hparams) model.set_dataset(self.dataset) # The default logger in PyTorch Lightning writes to event files to be consumed by # TensorBoard. We don't use any logger here as it requires us to implement several abstract # methods. Instead we setup a simple callback, that saves metrics from each validation step. metrics_callback = MetricsCallback() # set up arguments required for integrating with optuna _targs = dict( logger=False, checkpoint_callback=checkpoint_callback, callbacks=[metrics_callback], early_stop_callback=PyTorchLightningPruningCallback(trial, monitor=self.monitor_metric), ) _targs.update(self.targs) trainer = pl.Trainer(**_targs) trainer.fit(model) return metrics_callback.metrics[-1][self.monitor_metric]
def objective(trial): # PyTorch Lightning will try to restore model parameters from previous trials if checkpoint # filenames match. Therefore, the filenames for each trial must be made unique. checkpoint_callback = pl.callbacks.ModelCheckpoint(os.path.join( MODEL_DIR, 'trial_{}'.format(trial.number)), save_best_only=False) # The default logger in PyTorch Lightning writes to event files to be consumed by # TensorBoard. We create a simple logger instead that holds the log in memory so that the # final accuracy can be obtained after optimization. When using the default logger, the # final accuracy could be stored in an attribute of the `Trainer` instead. logger = DictLogger(trial.number) trainer = pl.Trainer(logger=logger, val_percent_check=PERCENT_TEST_EXAMPLES, checkpoint_callback=checkpoint_callback, max_nb_epochs=EPOCHS, gpus=0 if torch.cuda.is_available() else None, early_stop_callback=PyTorchLightningPruningCallback( trial, monitor='accuracy')) model = LightningNet(trial) trainer.fit(model) return logger.metrics[-1]['accuracy']
def create_trainer(cfg, tags=None, trial=None, callbacks=None): if trial: checkpoint_callback = pl.callbacks.ModelCheckpoint( f'trial#{trial.number}') new_callbacks = [PyTorchLightningPruningCallback(trial, 'val_loss')] if callbacks: new_callbacks.extend(callbacks) trainer = pl.Trainer(logger=False, callbacks=new_callbacks, checkpoint_callback=checkpoint_callback, max_epochs=400, progress_bar_refresh_rate=0, weights_summary=None) else: trainer = pl.Trainer( logger=NeptuneLogger(project_name='yoniosin/amygdala', tags=tags, params=flatten(cfg, reducer='path')), max_epochs=cfg.learner.max_epochs, # callbacks=[pl.callbacks.EarlyStopping('val_loss', patience=200)] # fast_dev_run=True ) return trainer
def objective(trial): # Filenames for each trial must be made unique in order to access each checkpoint. checkpoint_callback = pl.callbacks.ModelCheckpoint(os.path.join( MODEL_DIR, "trial_{}".format(trial.number), "{epoch}"), monitor="val_acc") # The default logger in PyTorch Lightning writes to event files to be consumed by # TensorBoard. We don't use any logger here as it requires us to implement several abstract # methods. Instead we setup a simple callback, that saves metrics from each validation step. metrics_callback = MetricsCallback() trainer = pl.Trainer( logger=False, limit_val_batches=PERCENT_VALID_EXAMPLES, checkpoint_callback=checkpoint_callback, max_epochs=EPOCHS, gpus=1 if torch.cuda.is_available() else None, callbacks=[ metrics_callback, PyTorchLightningPruningCallback(trial, monitor="val_acc") ], ) model = LightningNet(trial) trainer.fit(model) return metrics_callback.metrics[-1]["val_acc"].item()
def objective(trial, args): params = get_trial_params(trial) params['hidden_size'] = 2**params['hidden_size'] params['acc_grads'] = 2**params['acc_grads'] early_stopper = EarlyStopping( monitor='val_loss', min_delta=0.005, patience=3, mode='min') callbacks = [early_stopper, PyTorchLightningPruningCallback( trial, monitor="val_loss")] if args.model_type == 'attnlstm': params['attn_width'] = trial.suggest_int("attn_width", 3, 64) if 'split' in args.val_mode: dataset_hour = args.data.split('_')[-1] logger = MLFlowLogger(experiment_name=f'Optuna_{dataset_hour}h_{args.val_mode[-1]}_split') print(f'Optuna_{dataset_hour}_{args.val_mode[-1]}_split') val_losses = [] for _split_id in range(int(args.val_mode[-1])): print(f"Split {_split_id} Trial {trial.number}") args.__dict__["split_id"] = 0 for key in params: args.__dict__[str(key)] = params.get(key) model = LitLSTM(args) trainer = Trainer( logger=logger, callbacks=callbacks, **get_trainer_params(args), ) logger.log_hyperparams(model.args) args.__dict__["val_mode"] = args.val_mode args.__dict__["split_id"] = _split_id model._get_data(args, data_mode='init') trainer.fit(model) trainer.test(model, test_dataloaders=model.test_dataloader()) # logger.finalize() val_losses.append(model.metrics['val_loss']) # log mean val loss for later retrieval of best model mean_val_loss = torch.stack(val_losses).mean() logger.log_metrics({"mean_val_loss": mean_val_loss}, step=0) logger.finalize() return mean_val_loss elif args.val_mode == 'full': logger = MLFlowLogger(experiment_name='Optuna_full') for key in params: args.__dict__[str(key)] = params.get(key) model = LitLSTM(args) trainer = Trainer( logger=logger, callbacks=callbacks, **get_trainer_params(args), ) logger.log_hyperparams(model.args) trainer.fit(model) trainer.test(model, test_dataloaders=model.test_dataloader()) model.save_preds_and_targets(to_disk=True) logger.finalize() return model.metrics['val_loss']
def build_trainer(run_config, hyperparameters, trial=None): ''' Set up optuna trainer ''' if 'progress_bar_refresh_rate' in hyperparameters: p_refresh = hyperparameters['progress_bar_refresh_rate'] else: p_refresh = 5 # set epochs, gpus, gradient clipping, etc. # if 'no_gpu' in run config, then use CPU trainer_kwargs = { 'max_epochs': hyperparameters['max_epochs'], "gpus": 0 if 'no_gpu' in run_config else 1, "num_sanity_val_steps": 0, "progress_bar_refresh_rate": p_refresh, "gradient_clip_val": hyperparameters['grad_clip'] } # set auto learning rate finder param if 'auto_lr_find' in hyperparameters and hyperparameters['auto_lr_find']: trainer_kwargs['auto_lr_find'] = hyperparameters['auto_lr_find'] # Create tensorboard logger lgdir = os.path.join(run_config['tb']['dir_full'], run_config['tb']['name']) if not os.path.exists(lgdir): os.makedirs(lgdir) logger = TensorBoardLogger(run_config['tb']['dir_full'], name=run_config['tb']['name'], version="version_" + str(random.randint(0, 10000000))) if not os.path.exists(logger.log_dir): os.makedirs(logger.log_dir) print("Tensorboard logging at ", logger.log_dir) trainer_kwargs["logger"] = logger # Save top three model checkpoints trainer_kwargs["checkpoint_callback"] = ModelCheckpoint( filepath=os.path.join( logger.log_dir, "{epoch}-{val_micro_f1:.2f}-{val_acc:.2f}-{val_auroc:.2f}"), save_top_k=3, verbose=True, monitor=run_config['optuna']['monitor_metric'], mode='max') # if we use pruning, use the pytorch lightning pruning callback if run_config["optuna"]['pruning']: trainer_kwargs[ 'early_stop_callback'] = PyTorchLightningPruningCallback( trial, monitor=run_config['optuna']['monitor_metric']) trainer = pl.Trainer(**trainer_kwargs) return trainer, trainer_kwargs, logger.log_dir
def objective(trial, **kwargs): # # Categorical parameter # optimizer = trial.suggest_categorical('optimizer', ['MomentumSGD', 'Adam']) # # Int parameter # num_layers = trial.suggest_int('num_layers', 1, 3) # # Uniform parameter dropout_prob = trial.suggest_uniform('dropout_prob', 0.0, 1.0) # # Loguniform parameter # learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2) # # Discrete-uniform parameter # drop_path_rate = trial.suggest_discrete_uniform('drop_path_rate', 0.0, 1.0, 0.01) print("dropout_prob: {}".format(dropout_prob)) kwargs.update(dropout_prob=dropout_prob) # Filenames for each trial must be made unique in order to access each checkpoint. # checkpoint_callback = pl.callbacks.ModelCheckpoint( # os.path.join(MODEL_DIR, "trial_{}".format(trial.number), "{epoch}"), monitor="val_acc" # ) # The default logger in PyTorch Lightning writes to event files to be consumed by # TensorBoard. We don't use any logger here as it requires us to implement several abstract # methods. Instead we setup a simple callback, that saves metrics from each validation step. metrics_callback = MetricsCallback() """ Main training routine specific for this project. """ # ------------------------ # 1 INIT a model and the LIGHTNING Experiment class # ------------------------ model = MLP(**kwargs) experiment = ImageClassificationExperiment(model=model, **kwargs) # ------------------------ # 2 INIT TRAINER # ------------------------ kwargs.update({ "logger": False, # "checkpoint_callback": checkpoint_callback, "callbacks": [metrics_callback], "early_stop_callback": PyTorchLightningPruningCallback(trial, monitor="val_loss") }) valid_kwargs = inspect.signature(pl.Trainer.__init__).parameters trainer_kwargs = dict( (name, kwargs[name]) for name in valid_kwargs if name in kwargs) trainer = pl.Trainer(**trainer_kwargs) # ------------------------ # 3 START TRAINING # ------------------------ trainer.fit(experiment) return metrics_callback.metrics[-1]["val_loss"].item()
def get_callbacks(trial): metrics_callback = MetricsCallback() early_stop_callback = pl.callbacks.EarlyStopping(monitor='val_R%_@1', patience=5, strict=True, verbose=False, mode='max') return [ metrics_callback, early_stop_callback, PyTorchLightningPruningCallback(trial, monitor="val_R%_@1") ]
def objective(trial): config = Module.Config() config.output_dir = None module = Module(config) # Filenames for each trial must be made unique in order to access each # checkpoint. checkpoint_callback = pl.callbacks.ModelCheckpoint( os.path.join("TODO", "trial_{}".format(trial.number), "{epoch}"), monitor="val_acc", ) # The default logger in PyTorch Lightning writes to event files to be # consumed by TensorBoard. We don't use any logger here as it requires us # to implement several abstract methods. Instead we setup a simple # callback, that saves metrics from each validation step. metrics_callback = MetricsCallback() trainer = pl.Trainer( logger=False, checkpoint_callback=checkpoint_callback, max_epochs=21, gpus=1, callbacks=[metrics_callback], early_stop_callback=PyTorchLightningPruningCallback( trial, monitor="accuracy"), ) logger = CustomLogger( save_dir=config.training_config.output_dir, name=config.training_config.experiment_name, version=f"seed={config.training_config.seed}", ) period = max(1, config.training_config.n_epochs // 5) trainer = pl.Trainer( gpus=1, gradient_clip_val=50.0, max_epochs=config.training_config.n_epochs, check_val_every_n_epoch=period, num_sanity_val_steps=0, checkpoint_callback=pl.callbacks.ModelCheckpoint( filepath=os.path.join(logger.log_dir, "checkpoints"), save_top_k=-1, save_last=True, run_eval=config.training_config.run_eval, ), logger=logger, ) trainer.fit(module) return metrics_callback.metrics[-1]["val_acc"].item()
def objective(trial: optuna.trial.Trial) -> float: trainer = pl.Trainer( max_epochs=2, enable_checkpointing=False, callbacks=[PyTorchLightningPruningCallback(trial, monitor="accuracy")], ) model = Model() trainer.fit(model) return 1.0
def objective(trial): """ Optuna function to optimize See https://github.com/optuna/optuna/blob/master/examples/pytorch_lightning_simple.py """ # sample hidden_size_exp = trial.suggest_int("hidden_size_exp", 1, 8) hidden_size = 2**hidden_size_exp layers = trial.suggest_int("layers", 1, 12) # Load model pt_model = m_fn(xs, ys, hidden_size, layers) model_name = type(pt_model).__name__ # Wrap in lightning patience = 2 model = PL_MODEL( pt_model, lr=3e-4, patience=patience, ).to(device) save_dir = f"../outputs/{timestamp}/{dataset_name}_{model_name}/{trial.number}" Path(save_dir).mkdir(exist_ok=True, parents=True) trainer = pl.Trainer( # Training length min_epochs=2, max_epochs=40, limit_train_batches=max_iters // batch_size, limit_val_batches=max_iters // batch_size // 5, # Misc gradient_clip_val=20, terminate_on_nan=True, # GPU gpus=1, amp_level='O1', precision=16, # Callbacks default_root_dir=save_dir, logger=False, callbacks=[ EarlyStopping(monitor='loss/val', patience=patience * 2), PyTorchLightningPruningCallback(trial, monitor="loss/val") ], ) trainer.fit(model, dl_train, dl_val) # Run on all val data, using test mode r = trainer.test(model, test_dataloaders=dl_val, verbose=False) return r[0]['loss/test']
def objective(trial): # sample hidden_size_exp = trial.suggest_int("hidden_size_exp", 2, 8) hidden_size = 2**hidden_size_exp layers = trial.suggest_int("layers", 2, 12) # Load model pt_model = m_fn(xs, ys, hidden_size, layers) model_name = type(pt_model).__name__ # Wrap in lightning patience = 2 model = PL_MODEL(pt_model, lr=3e-4, patience=patience, weight_decay=4e-5 ).to(device) # The default logger in PyTorch Lightning writes to event files to be consumed by # TensorBoard. We don't use any logger here as it requires us to implement several abstract # methods. Instead we setup a simple callback, that saves metrics from each validation step. # metrics_callback = MetricsCallback() save_dir = f"../outputs/{timestamp}/{dataset_name}_{model_name}/{trial.number}" Path(save_dir).mkdir(exist_ok=True, parents=True) trainer = pl.Trainer( # Training length min_epochs=2, max_epochs=100, limit_train_batches=max_iters//batch_size, limit_val_batches=max_iters//batch_size//5, # Misc gradient_clip_val=20, terminate_on_nan=True, # GPU gpus=1, amp_level='O1', precision=16, # Callbacks default_root_dir=save_dir, logger=False, callbacks=[ # metrics_callback, EarlyStopping(monitor='loss/val', patience=patience * 2), PyTorchLightningPruningCallback(trial, monitor="loss/val")], ) trainer.fit(model, dl_train, dl_val) # Run on all val data, using test mode r = trainer.test(model, test_dataloader=dl_val, verbose=False) return r[0]['loss/test']
def objective(trial: optuna.trial.Trial) -> float: trainer = pl.Trainer( max_epochs=1, accelerator="ddp_cpu", num_processes=2, checkpoint_callback=False, callbacks=[PyTorchLightningPruningCallback(trial, monitor="accuracy")], ) model = ModelDDP() trainer.fit(model) return 1.0
def objective(trial): # type: (optuna.trial.Trial) -> float trainer = pl.Trainer( early_stop_callback=PyTorchLightningPruningCallback(trial, monitor="accuracy"), min_epochs=0, # Required to fire the callback after the first epoch. max_epochs=2, checkpoint_callback=False, ) model = Model() trainer.fit(model) return 1.0
def __call__(self, trial): # The default logger in PyTorch Lightning writes to event files # to be consumed by TensorBoard. We don't use any logger here as # it requires us to implement several abstract methods. Instead # we setup a simple callback, that saves metrics from each # validation step. metrics_callback = MetricsCallback() # Define parameters parameters = { 'n_input': self.n_input, 'n_classes': self.n_classes, 'n_layers': trial.suggest_int('n_layers', *self.bounds['n_layers']), 'dropout': trial.suggest_uniform('dropout', *self.bounds['dropout']), 'batch_size': trial.suggest_int('batch_size', *self.bounds['batch_size']), 'learning_rate': trial.suggest_float('learning_rate', *self.bounds['learning_rate'], log=True), 'max_epochs': trial.suggest_int('max_epochs', *self.bounds['max_epochs']) } for i in range(parameters['n_layers']): parameters['n_units_l{}'.format(i)] = trial.suggest_int( 'n_units_l{}'.format(i), *self.bounds['n_units_l'], log=True) # Construct trainer object and train trainer = Trainer( logger=False, checkpoint_callback=False, distributed_backend='dp', max_epochs=parameters['max_epochs'], verbose=False, gpus=-1 if self.use_gpu else None, callbacks=[metrics_callback], early_stop_callback=PyTorchLightningPruningCallback( trial, monitor="val_loss"), ) model = LightningNet(parameters, self.data) trainer.fit(model) return metrics_callback.metrics[-1]["val_loss"]
def objective(trial): # type: (optuna.trial.Trial) -> float trainer = pl.Trainer( early_stop_callback=PyTorchLightningPruningCallback( trial, monitor='accuracy'), min_nb_epochs= 0, # Required to fire the callback after the first epoch. max_nb_epochs=2, ) trainer.checkpoint_callback = None # Disable unrelated checkpoint callbacks. model = Model() trainer.fit(model) return 1.0
def objective(trial: Trial): model = EEGNetHPO(trial) data = EEGData() checkpoint_callback = pl.callbacks.ModelCheckpoint(f'trial#{trial.number}') metrics_callback = MetricCallback() trainer = pl.Trainer( logger=False, callbacks=[metrics_callback, PyTorchLightningPruningCallback(trial, 'val_loss')], checkpoint_callback=checkpoint_callback, max_epochs=400, progress_bar_refresh_rate=0, weights_summary=None ) trainer.fit(model, datamodule=data) return metrics_callback.metric
def objective(trial: optuna.Trial): log_dir = os.path.join(args.log_dir, 'trial_{}'.format(trial.number)) checkpoint_callback = pl.callbacks.ModelCheckpoint(dirpath=log_dir, monitor='val_ce', mode='min') data = SemEvalDataModule(path_train=args.path_train, path_val=args.path_val, batch_size=trial.suggest_categorical( 'batch_size', choices=[16, 32, 64]), num_workers=args.workers) data.prepare_data() data.setup('fit') epochs = trial.suggest_categorical('epochs', choices=[3, 4, 5]) lr_bert = trial.suggest_loguniform('lr_bert', 1e-6, 1e-4) lr_class = trial.suggest_loguniform('lr_class', 1e-5, 1e-3) weight_decay = trial.suggest_loguniform('weight_decay', 1e-3, 1e-1) total_steps = epochs * len(data.data_train) effective_steps = total_steps // (min(args.gpus, 1) * args.num_nodes * args.accumulate_grad_batches) model = SentBert(out_classes=3, lr_bert=lr_bert, lr_class=lr_class, weight_decay=weight_decay, train_steps=effective_steps) metrics_callback = MetricsCallback() pruning_callback = PyTorchLightningPruningCallback(trial, monitor='val_ce') trainer = pl.Trainer.from_argparse_args(args, default_root_dir=args.log_dir, max_epochs=epochs, checkpoint_callback=True, accelerator='ddp', auto_select_gpus=True, num_sanity_val_steps=0, profiler='simple', callbacks=[ checkpoint_callback, metrics_callback, pruning_callback ]) trainer.fit(model=model, datamodule=data) return metrics_callback.metrics[-1]['val_ce'].item()
def objective_for_binary_unet(args, trial: optuna.trial.Trial): args.lr = trial.suggest_loguniform("lr", low=1e-5, high=1e-2) args.edge_weight = trial.suggest_uniform("edge_weight", low=1, high=5) args.wf = trial.suggest_int("wf", low=2, high=4) args.depth = trial.suggest_int("depth", low=4, high=6) pl_pruning_callback = PyTorchLightningPruningCallback( trial, "val/f1_score") ckpt_callback = train_binary_unet_model(args, callbacks=[pl_pruning_callback]) best_f1_score = ckpt_callback.best_model_score.detach().cpu().numpy().item( ) trial.set_user_attr("best_val_f1", best_f1_score) trial.set_user_attr("best_model_path", ckpt_callback.best_model_path) return best_f1_score
def objective(trial: optuna.trial.Trial) -> float: dataset = wds.WebDataset("/run/media/jacob/data/FACT_Dataset/fact-gamma-10-{0000..0062}.tar").shuffle(20000).decode() dataset_2 = wds.WebDataset("/run/media/jacob/data/FACT_Dataset/fact-proton-10-{0000..0010}.tar").shuffle(20000).decode() test_dataset_2 = wds.WebDataset("/run/media/jacob/data/FACT_Dataset/fact-gamma-10-{0063..0072}.tar").decode() test_dataset = wds.WebDataset("/run/media/jacob/data/FACT_Dataset/fact-proton-10-{0011..0013}.tar").decode() dataset = SampleEqually([dataset, dataset_2]) test_dataset = SampleEqually([test_dataset_2, test_dataset]) train_loader = DataLoader(dataset, num_workers=16, batch_size=4, pin_memory=True) test_loader = DataLoader(test_dataset, num_workers=4, batch_size=1, pin_memory=True) # We optimize the number of layers, hidden units in each layer and dropouts. config = { "sample_ratio_one": trial.suggest_uniform("sample_ratio_one", 0.1, 0.9), "sample_radius_one": trial.suggest_uniform("sample_radius_one", 0.1, 0.9), "sample_max_neighbor": trial.suggest_int("sample_max_neighbor", 8, 72), "sample_ratio_two": trial.suggest_uniform("sample_ratio_two", 0.1, 0.9), "sample_radius_two": trial.suggest_uniform("sample_radius_two", 0.1, 0.9), "fc_1": trial.suggest_int("fc_1", 128, 256), "fc_1_out": trial.suggest_int("fc_1_out", 32, 128), "fc_2_out": trial.suggest_int("fc_2_out", 16, 96), "dropout": trial.suggest_uniform("dropout", 0.1, 0.9), } num_classes = 2 import pytorch_lightning as pl model = LitPointNet2(num_classes, lr=0.0001, config=config) trainer = pl.Trainer( logger=True, limit_val_batches=10000, limit_train_batches=10000, checkpoint_callback=False, auto_lr_find=True, max_epochs=20, gpus=1, callbacks=[PyTorchLightningPruningCallback(trial, monitor="val/loss")], ) trainer.logger.log_hyperparams(config) trainer.tune(model=model, train_dataloader=train_loader, val_dataloaders=test_loader) trainer.fit(model=model, train_dataloader=train_loader, val_dataloaders=test_loader) return trainer.callback_metrics["val/loss"].item()
def objective(trial): # Filenames for each trial must be made unique in order to access each checkpoint. checkpoint_callback = pl.callbacks.ModelCheckpoint( os.path.join(MODEL_DIR, "trial_{}".format(trial.number), "{epoch}"), monitor="val_acc" ) trainer = pl.Trainer( logger=False, limit_val_batches=PERCENT_VALID_EXAMPLES, checkpoint_callback=checkpoint_callback, max_epochs=EPOCHS, gpus=1 if torch.cuda.is_available() else None, callbacks=[PyTorchLightningPruningCallback(trial, monitor="val_acc", mode="max")], ) model = LightningNet(trial) trainer.fit(model) return trainer.callback_metrics["val_acc"].item()
def objective(trial): model = BaseMLPModel( trial=trial, hparams=hparams, input_size=sample_size * len(train_features), sample_size=sample_size, output_size=output_size, station_name=station_name, target=target, features=train_features, features_periodic=train_features_periodic, features_nonperiodic=train_features_nonperiodic, train_dataset=train_dataset, val_dataset=val_dataset, test_dataset=test_dataset, scaler_X=train_valid_dataset.scaler_X, scaler_Y=train_valid_dataset.scaler_Y, output_dir=output_dir) # most basic trainer, uses good defaults trainer = Trainer(gpus=1 if torch.cuda.is_available() else None, precision=32, min_epochs=1, max_epochs=20, default_root_dir=output_dir, fast_dev_run=fast_dev_run, logger=True, checkpoint_callback=False, callbacks=[ PyTorchLightningPruningCallback( trial, monitor="valid/MSE") ]) trainer.fit(model) # Don't Log # hyperparameters = model.hparams # trainer.logger.log_hyperparams(hyperparameters) return trainer.callback_metrics.get("valid/MSE")