def save_best_model_by_val_score(output_path, evaluator, model, metric_name, n_saved=3, trainer=None, tag="val"): """Method adds a handler to `evaluator` to save best models based on the score (named by `metric_name`) provided by `evaluator`. Args: output_path (str): output path to indicate where to save best models evaluator (Engine): evaluation engine used to provide the score model (nn.Module): model to store metric_name (str): metric name to use for score evaluation. This metric should be present in `evaluator.state.metrics`. n_saved (int, optional): number of best models to store trainer (Engine, optional): trainer engine to fetch the epoch when saving the best model. tag (str, optional): score name prefix: `{tag}_{metric_name}`. By default, tag is "val". """ global_step_transform = None if trainer is not None: global_step_transform = global_step_from_engine(trainer) best_model_handler = ModelCheckpoint(dirname=output_path, filename_prefix="best", n_saved=n_saved, global_step_transform=global_step_transform, score_name="{}_{}".format(tag, metric_name.lower()), score_function=get_default_score_fn(metric_name)) evaluator.add_event_handler(Events.COMPLETED, best_model_handler, {'model': model, })
def __init__(self, trainer, evaluator, res_dir='results', **kwargs): self.trainer = trainer self.evaluator = evaluator self.step_func = global_step_from_engine(trainer) self.start_datetime = datetime.now().strftime('%Y-%m-%d %H:%M:%S') self.res_dir = res_dir / self.start_datetime self.prefix = f"{self.start_datetime}" self.res_dir.mkdir(parents=True)
def gen_save_best_models_by_val_score(save_handler: Union[Callable, BaseSaveHandler], evaluator: Engine, models: Union[torch.nn.Module, Dict[str, torch.nn.Module]], metric_name: str, n_saved: int = 3, trainer: Optional[Engine] = None, tag: str = "val", **kwargs: Any) -> Checkpoint: """Method adds a handler to ``evaluator`` to save ``n_saved`` of best models based on the metric (named by ``metric_name``) provided by ``evaluator`` (i.e. ``evaluator.state.metrics[metric_name]``). Models with highest metric value will be retained. The logic of how to store objects is delegated to ``save_handler``. Args: save_handler (callable or :class:`~ignite.handlers.checkpoint.BaseSaveHandler`): Method or callable class to use to save engine and other provided objects. Function receives two objects: checkpoint as a dictionary and filename. If ``save_handler`` is callable class, it can inherit of :class:`~ignite.handlers.checkpoint.BaseSaveHandler` and optionally implement ``remove`` method to keep a fixed number of saved checkpoints. In case if user needs to save engine's checkpoint on a disk, ``save_handler`` can be defined with :class:`~ignite.handlers.DiskSaver`. evaluator (Engine): evaluation engine used to provide the score models (nn.Module or Mapping): model or dictionary with the object to save. Objects should have implemented ``state_dict`` and ``load_state_dict`` methods. metric_name (str): metric name to use for score evaluation. This metric should be present in `evaluator.state.metrics`. n_saved (int, optional): number of best models to store trainer (Engine, optional): trainer engine to fetch the epoch when saving the best model. tag (str, optional): score name prefix: `{tag}_{metric_name}`. By default, tag is "val". **kwargs: optional keyword args to be passed to construct :class:`~ignite.handlers.checkpoint.Checkpoint`. Returns: A :class:`~ignite.handlers.Checkpoint` handler. """ global_step_transform = None if trainer is not None: global_step_transform = global_step_from_engine(trainer) if isinstance(models, nn.Module): to_save = {"model": models} # type: Dict[str, nn.Module] else: to_save = models best_model_handler = Checkpoint( to_save, save_handler, filename_prefix="best", n_saved=n_saved, global_step_transform=global_step_transform, score_name="{}_{}".format(tag, metric_name.lower()), score_function=get_default_score_fn(metric_name), **kwargs, ) evaluator.add_event_handler(Events.COMPLETED, best_model_handler) return best_model_handler
def _setup_logging( logger: BaseLogger, trainer: Engine, optimizers: Optional[Union[Optimizer, Dict[str, Optimizer], Dict[None, Optimizer]]], evaluators: Optional[Union[Engine, Dict[str, Engine]]], log_every_iters: int, ) -> None: if optimizers is not None: if not isinstance(optimizers, (Optimizer, Mapping)): raise TypeError( "Argument optimizers should be either a single optimizer or a dictionary or optimizers" ) if evaluators is not None: if not isinstance(evaluators, (Engine, Mapping)): raise TypeError( "Argument evaluators should be either a single engine or a dictionary or engines" ) if log_every_iters is None: log_every_iters = 1 logger.attach_output_handler( trainer, event_name=Events.ITERATION_COMPLETED(every=log_every_iters), tag="training", metric_names="all") if optimizers is not None: # Log optimizer parameters if isinstance(optimizers, Optimizer): optimizers = {None: optimizers} for k, optimizer in optimizers.items(): logger.attach_opt_params_handler( trainer, Events.ITERATION_STARTED(every=log_every_iters), optimizer, param_name="lr", tag=k) if evaluators is not None: # Log evaluation metrics if isinstance(evaluators, Engine): evaluators = {"validation": evaluators} event_name = Events.ITERATION_COMPLETED if isinstance( logger, WandBLogger) else None gst = global_step_from_engine(trainer, custom_event_name=event_name) for k, evaluator in evaluators.items(): logger.attach_output_handler(evaluator, event_name=Events.COMPLETED, tag=k, metric_names="all", global_step_transform=gst)
def setup_any_logging(logger, logger_module, trainer, optimizers, evaluators, log_every_iters): if optimizers is not None: from torch.optim.optimizer import Optimizer if not isinstance(optimizers, (Optimizer, Mapping)): raise TypeError( "Argument optimizers should be either a single optimizer or a dictionary or optimizers" ) if evaluators is not None: if not isinstance(evaluators, (Engine, Mapping)): raise TypeError( "Argument optimizers should be either a single optimizer or a dictionary or optimizers" ) if log_every_iters is None: log_every_iters = 1 logger.attach( trainer, log_handler=logger_module.OutputHandler(tag="training", metric_names="all"), event_name=Events.ITERATION_COMPLETED(every=log_every_iters), ) if optimizers is not None: # Log optimizer parameters if isinstance(optimizers, Optimizer): optimizers = {None: optimizers} for k, optimizer in optimizers.items(): logger.attach( trainer, log_handler=logger_module.OptimizerParamsHandler( optimizer, param_name="lr", tag=k), event_name=Events.ITERATION_STARTED(every=log_every_iters), ) if evaluators is not None: # Log evaluation metrics if isinstance(evaluators, Engine): evaluators = {"validation": evaluators} for k, evaluator in evaluators.items(): gst = global_step_from_engine(trainer) logger.attach( evaluator, log_handler=logger_module.OutputHandler( tag=k, metric_names="all", global_step_transform=gst), event_name=Events.COMPLETED, )
def test_global_step_from_engine(): engine = Engine(lambda engine, batch: None) engine.state = State() engine.state.epoch = 1 another_engine = Engine(lambda engine, batch: None) another_engine.state = State() another_engine.state.epoch = 10 global_step_transform = global_step_from_engine(another_engine) res = global_step_transform(engine, Events.EPOCH_COMPLETED) assert res == another_engine.state.epoch
# Attach handler to plot trainer's loss every 100 iterations tb_logger.attach_output_handler( trainer, event_name=Events.ITERATION_COMPLETED(every=100), tag="training", output_transform=lambda loss: {"batch_loss": loss}, ) # Attach handler for plotting both evaluators' metrics after every epoch completes tb_logger.attach_output_handler( evaluator, event_name=Events.EPOCH_COMPLETED, tag="validation", metric_names="all", global_step_transform=global_step_from_engine(trainer), ) # ### Executing the PyTorch-Ignite model training code trainer.run(train_loader, max_epochs=5) # --- # # Readers may ignore the next cell.
def attach_handlers(run, model, optimizer, trainer, train_evaluator, evaluator, train_loader, val_loader, params): # Tqdm logger pbar = ProgressBar(persist=True, bar_format=config.IGNITE_BAR_FORMAT) pbar.attach(trainer.engine, metric_names='all') tqdm_logger = TqdmLogger(pbar=pbar) # noinspection PyTypeChecker tqdm_logger.attach_output_handler( evaluator.engine, event_name=Events.COMPLETED, tag="validation", global_step_transform=global_step_from_engine(trainer.engine), ) # noinspection PyTypeChecker tqdm_logger.attach_output_handler( train_evaluator.engine, event_name=Events.COMPLETED, tag="train", global_step_transform=global_step_from_engine(trainer.engine), ) # Evaluators train_evaluator.attach(trainer.engine, Events.EPOCH_COMPLETED, train_loader) evaluator.attach(trainer.engine, Events.EPOCH_COMPLETED, data=val_loader) # Learning rate scheduling lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', verbose=True, patience=5, factor=0.5) evaluator.engine.add_event_handler( Events.COMPLETED, lambda engine: lr_scheduler.step(engine.state.metrics['accuracy'])) # Early stopping es_handler = EarlyStopping( patience=15, score_function=lambda engine: engine.state.metrics['accuracy'], trainer=trainer.engine, cumulative_delta=True, min_delta=0.0001) if 'train_all' in params and params['train_all']: train_evaluator.engine.add_event_handler(Events.COMPLETED, es_handler) else: evaluator.engine.add_event_handler(Events.COMPLETED, es_handler) es_handler.logger.setLevel(logging.DEBUG) # Model checkpoints name = run.replace('/', '-') mc_handler = ModelCheckpoint( config.MODELS_DIR, name, n_saved=1, create_dir=True, require_empty=False, score_name='acc', score_function=lambda engine: engine.state.metrics['accuracy'], global_step_transform=global_step_from_engine(trainer.engine)) evaluator.engine.add_event_handler(Events.EPOCH_COMPLETED, mc_handler, {'m': model}) # TensorBoard logger tb_logger = TensorboardLogger( log_dir=os.path.join(config.TENSORBOARD_DIR, run)) images, labels = next(iter(train_loader)) tb_logger.writer.add_graph(copy.deepcopy(model).cpu(), images) tb_logger.writer.add_hparams(params, {'hparam/dummy': 0}) # noinspection PyTypeChecker tb_logger.attach_output_handler( train_evaluator.engine, event_name=Events.COMPLETED, tag="train", metric_names="all", global_step_transform=global_step_from_engine(trainer.engine), ) # noinspection PyTypeChecker tb_logger.attach_output_handler( evaluator.engine, event_name=Events.COMPLETED, tag="validation", metric_names="all", global_step_transform=global_step_from_engine(trainer.engine), ) input_shape = tuple(next(iter(train_loader))[0].shape[1:]) tb_logger.attach(trainer.engine, log_handler=WeightsImageHandler(model, input_shape), event_name=Events.EPOCH_COMPLETED) tb_logger.attach(trainer.engine, log_handler=OptimizerParamsHandler(optimizer), event_name=Events.EPOCH_STARTED) # tb_logger.attach(trainer.engine, log_handler=WeightsScalarHandler(model), event_name=Events.EPOCH_COMPLETED) # tb_logger.attach(trainer.engine, log_handler=WeightsHistHandler(model), event_name=Events.EPOCH_COMPLETED) # tb_logger.attach(trainer.engine, # log_handler=ActivationsHistHandler(model, layer_names=['linear1', 'batch_norm', 'repu']), # event_name=Events.ITERATION_COMPLETED) # tb_logger.attach(trainer.engine, # log_handler=NumActivationsScalarHandler(model, layer_names=['linear1', 'repu']), # event_name=Events.ITERATION_COMPLETED) # tb_logger.attach(trainer.engine, # log_handler=ActivationsScalarHandler(model, reduction=torch.mean, # layer_names=['linear1', 'batch_norm', 'repu']), # event_name=Events.ITERATION_COMPLETED) # tb_logger.attach(trainer.engine, # log_handler=ActivationsScalarHandler(model, reduction=torch.std, # layer_names=['linear1', 'batch_norm', 'repu']), # event_name=Events.ITERATION_COMPLETED) return es_handler, tb_logger