def on_epoch_start(self, runner: IRunner) -> None: """ Set loaders for current epoch. If validation is not required then the first loader from loaders used in current epoch will be used as validation loader. Metrics from the latest epoch with true validation loader will be used in the epochs where this loader is missing. Args: runner (IRunner): current runner Raises: ValueError: if there are no loaders in epoch """ epoch_num = runner.epoch # loaders to use in current epoch epoch_loaders = OrderedDict() for name, loader in self.loaders.items(): period = self.loader_periods.get(name, 1) # ignore loaders where period - 0 if period > 0 and epoch_num % period == 0: epoch_loaders[name] = loader if len(epoch_loaders) == 0: raise ValueError(f"There is no loaders in epoch {epoch_num}!") first_loader = next(iter(epoch_loaders.keys())) runner.valid_loader = (self.valid_loader if self.valid_loader in epoch_loaders else first_loader) runner.loaders = epoch_loaders
def _exit_profiler(self, runner: IRunner) -> None: loader_key = runner.loader_key epoch = runner.stage_epoch_step if not self._should_use_profiler(loader_key, epoch) or self.profiler is None: return if self.stats is None: self.profiler.__exit__(None, None, None) if "on_trace_ready" not in self.profiler_kwargs and self.export_chrome_trace_path: self.profiler.export_chrome_trace( self.export_chrome_trace_path) if self.export_stacks_kwargs is not None: self.profiler.export_stacks(**self.export_stacks_kwargs) self.stats = self.profiler.key_averages() table_txt = self.stats.table( sort_by="cpu_time_total") # , row_limit=100) with TemporaryDirectory() as tmp_dir: artifact_path = os.path.join(tmp_dir, "profiler_table.txt") with open(artifact_path, "w") as f: f.write(table_txt) runner.log_artifact( tag="profiler", artifact="profiler.txt", path_to_artifact=artifact_path, ) print(table_txt)
def on_epoch_end(self, runner: IRunner) -> None: """Epoch end hook. Args: runner (IRunner): current runner """ if runner.stage_name.startswith("infer"): return runner.valid_metrics = { k.replace(f"{runner.valid_loader}_", ""): v for k, v in runner.epoch_metrics.items() if k.startswith(runner.valid_loader) } assert ( runner.main_metric in runner.valid_metrics ), f"{runner.main_metric} value is not available by the epoch end" current_valid_metric = runner.valid_metrics[runner.main_metric] if runner.minimize_metric: best_valid_metric = runner.best_valid_metrics.get( runner.main_metric, float("+inf")) is_best = current_valid_metric < best_valid_metric else: best_valid_metric = runner.best_valid_metrics.get( runner.main_metric, float("-inf")) is_best = current_valid_metric > best_valid_metric if is_best: runner.is_best_valid = True runner.best_valid_metrics = runner.valid_metrics.copy()
def on_stage_start(self, runner: IRunner) -> None: """Setup model for stage. .. note:: If CheckpointCallback initialized with ``resume`` (as path to checkpoint file) or ``resume`` (as filename) and ``resume_dir`` (as directory with file) then will be performed loading checkpoint. Args: runner (IRunner): current runner """ if runner.device.type == "xla": from torch_xla.core.xla_model import save else: from torch import save self._save_fn = save if getattr(runner, "resume", None) is not None: self.resume = runner.resume runner.resume = None elif getattr(runner, "autoresume", None) is not None: self.resume_dir = runner.logdir / "checkpoints" self.resume = f"{runner.autoresume}_full.pth" runner.autoresume = None for key in self._keys_from_state: value = getattr(runner, key, None) if value is not None: setattr(self, key, value) if self.resume_dir is not None: self.resume = str(self.resume_dir) + "/" + str(self.resume) if self.resume is not None: self._load_runner(runner, mapping=self.resume, load_full=True) self.resume = None else: checkpoint_exists = False need_load_full = False if isinstance(self.load_on_stage_start, str): checkpoint_exists = os.path.isfile( "{}/checkpoints/{}.pth".format(runner.logdir, self.load_on_stage_start)) need_load_full = self.load_on_stage_start.endswith("full") elif isinstance(self.load_on_stage_start, dict): required_files = _required_files( runner.logdir, self.load_on_stage_start).keys() checkpoint_exists = all( os.path.isfile(file) for file in required_files) if self.load_on_stage_start is not None and checkpoint_exists: self._load_runner( runner, mapping=self.load_on_stage_start, load_full=need_load_full, )
def on_epoch_start(self, runner: IRunner) -> None: """Epoch start hook. Args: runner (IRunner): current runner """ runner.valid_metrics = defaultdict(None) runner.is_best_valid = False
def _load_checkpoint(*, filename, runner: IRunner, load_full: bool = True) -> None: """ Load checkpoint from a file. Arguments: filename (str): path to checkpoint runner (IRunner): current runner load_full (bool): if true (default) then will be performed loading states for criterion, optimizer and scheduler. File should contain keys required for loading model (``'model_state_dict'``), criterion (``'criterion_state_dict'``) (only for full load), optimizer (``'optimizer_state_dict'``), scheduler (``'scheduler_state_dict'``). Raises: FileNotFoundError: when file specified in ``filename`` is not exist. """ if not os.path.isfile(filename): raise FileNotFoundError(f"No checkpoint found at {filename}!") print(f"=> Loading checkpoint {filename}") checkpoint = utils.load_checkpoint(filename) if not runner.stage_name.startswith("infer") and load_full: runner.stage_name = checkpoint["stage_name"] runner.epoch = checkpoint["epoch"] runner.global_epoch = checkpoint["global_epoch"] # @TODO: should we also load, # checkpoint_data, main_metric, minimize_metric, valid_loader ? # epoch_metrics, valid_metrics ? if load_full: utils.unpack_checkpoint( checkpoint, model=runner.model, criterion=runner.criterion, optimizer=runner.optimizer, scheduler=runner.scheduler, ) print(f"loaded state checkpoint {filename} " f"(global epoch {checkpoint['global_epoch']}, " f"epoch {checkpoint['epoch']}, " f"stage {checkpoint['stage_name']})") else: utils.unpack_checkpoint( checkpoint, model=runner.model, ) print(f"loaded model checkpoint {filename}")
def on_epoch_end(self, runner: IRunner) -> None: """Store validation metrics and use latest validation score when validation loader is not required. Args: runner (IRunner): current runner """ if self.valid_loader in runner.loaders: self.valid_metrics = runner.valid_metrics.copy() elif self.valid_metrics is not None: # use previous score on validation runner.valid_metrics = self.valid_metrics runner.is_best_valid = False
def update_optimizer(self, runner: IRunner) -> None: """@TODO: Docs. Contribution is welcome. Args: runner (IRunner): current runner """ lr, momentum = self._update_optimizer(optimizer=self._optimizer) if self.optimizer_key is not None: runner.batch_metrics[f"lr_{self.optimizer_key}"] = lr runner.batch_metrics[f"momentum_{self.optimizer_key}"] = momentum else: runner.batch_metrics["lr"] = lr runner.batch_metrics["momentum"] = momentum
def update_optimizer(self, runner: IRunner) -> None: """Update learning rate and momentum in runner. Args: runner: current runner """ lr, momentum = self._update_optimizer(optimizer=self._optimizer) if self.optimizer_key is not None: runner.batch_metrics[f"lr_{self.optimizer_key}"] = lr runner.batch_metrics[f"momentum_{self.optimizer_key}"] = momentum else: runner.batch_metrics["lr"] = lr runner.batch_metrics["momentum"] = momentum
def on_epoch_end(self, runner: IRunner) -> None: """On epoch end event. Args: runner: current runner """ lr = self._optimizer.param_groups[0]["lr"] lr_name = (f"lr/{self.optimizer_key}" if self.optimizer_key is not None else "lr") runner.epoch_metrics[lr_name] = lr momentum = utils.get_optimizer_momentum(self._optimizer) if momentum is not None: momentum_name = (f"momentum/{self.optimizer_key}" if self.optimizer_key is not None else "momentum") runner.epoch_metrics[momentum_name] = momentum
def on_epoch_start(self, runner: IRunner) -> None: """Epoch start hook. Args: runner (IRunner): current runner """ runner.epoch_metrics = defaultdict(None)
def on_stage_start(self, runner: IRunner) -> None: """Stage start hook. Args: runner: current runner """ self.reduced_metric = self.reduced_metric or runner.main_metric scheduler = runner.get_attr( key="scheduler", inner_key=self.scheduler_key ) assert scheduler is not None self._scheduler = scheduler if self.mode is None: if isinstance(scheduler, BatchScheduler): self.mode = "batch" else: self.mode = "epoch" if ( isinstance(scheduler, OneCycleLRWithWarmup) and self.mode == "batch" ): scheduler.reset() assert self.mode is not None
def on_batch_start(self, runner: IRunner) -> None: """Batch start hook. Args: runner: current runner """ runner.batch_metrics = defaultdict(None)
def on_stage_start(self, runner: IRunner): """Checks that the current stage has correct criterion.""" criterion = runner.get_attr( key="criterion", inner_key=self.criterion_key ) assert criterion is not None self._criterion = criterion
def on_batch_start(self, runner: IRunner) -> None: """Mixes data according to Cutmix algorithm. Args: runner: current runner """ if not self.is_needed: return if self.alpha > 0: self.lam = np.random.beta(self.alpha, self.alpha) else: self.lam = 1 self.index = torch.randperm(runner.input[self.fields[0]].shape[0]) self.index.to(runner.device) bbx1, bby1, bbx2, bby2 = self._rand_bbox( runner.input[self.fields[0]].shape, self.lam) for f in self.fields: runner.input[f][:, :, bbx1:bbx2, bby1:bby2] = runner.input[f][self.index, :, bbx1:bbx2, bby1:bby2] self.lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (runner.input[self.fields[0]].shape[-1] * runner.input[self.fields[0]].shape[-2]))
def on_loader_start(self, runner: IRunner) -> None: """Loader start hook. Args: runner (IRunner): current runner """ runner.loader_metrics = defaultdict(None) self.meters = defaultdict(meters.AverageValueMeter)
def on_epoch_end(self, runner: IRunner): """Check if iterated specified number of epochs. Args: runner: current runner """ if runner.epoch >= self.num_epoch_steps: runner.need_early_stop = True
def step_batch(self, runner: IRunner) -> None: """@TODO: Docs. Contribution is welcome. Args: runner (IRunner): current runner """ lr, momentum = self._scheduler_step(scheduler=self._scheduler) if self.scheduler_key is not None: runner.batch_metrics[f"lr/{self.scheduler_key}"] = lr if momentum is not None: runner.batch_metrics[ f"momentum/{self.scheduler_key}"] = momentum else: runner.batch_metrics["lr"] = lr if momentum is not None: runner.batch_metrics["momentum"] = momentum
def on_batch_end(self, runner: IRunner) -> None: """Batch end hook. Args: runner (IRunner): current runner """ runner.batch_metrics = self._process_metrics(runner.batch_metrics) for key, value in runner.batch_metrics.items(): self.meters[key].add(value, runner.batch_size)
def step_batch(self, runner: IRunner) -> None: """Update learning rate and momentum in runner. Args: runner (IRunner): current runner """ lr, momentum = self._scheduler_step(scheduler=self._scheduler) if self.scheduler_key is not None: runner.batch_metrics[f"lr/{self.scheduler_key}"] = lr if momentum is not None: runner.batch_metrics[ f"momentum/{self.scheduler_key}"] = momentum else: runner.batch_metrics["lr"] = lr if momentum is not None: runner.batch_metrics["momentum"] = momentum
def on_batch_end(self, runner: IRunner): """Check if iterated specified number of batches. Args: runner: current runner """ if runner.loader_batch_step >= self.num_batch_steps: runner.need_early_stop = True
def __init__( self, input_key: str = "features", target_key: str = "target", loss_key: str = "loss", augemention_prefix: str = "augment", projection_prefix: str = "projection", embedding_prefix: str = "embedding", ): """Init.""" IRunner.__init__(self) self._target_key = target_key self._loss_key = loss_key self._projection_prefix = projection_prefix self._augemention_prefix = augemention_prefix self._embedding_prefix = embedding_prefix self._input_key = input_key
def on_stage_start(self, runner: IRunner) -> None: """Checks that the current stage has correct optimizer. Args: runner(IRunner): current runner """ self._optimizer = runner.get_attr(key="optimizer", inner_key=self.optimizer_key) assert self._optimizer is not None
def on_exception(self, runner: IRunner): """Called if an Exception was raised.""" exception = runner.exception if not utils.is_exception(exception): return if isinstance(exception, KeyboardInterrupt): self.tqdm.write("Early exiting") runner.need_exception_reraise = False
def on_loader_end(self, runner: IRunner) -> None: """Loader end hook. Args: runner (IRunner): current runner """ for key, value in self.meters.items(): value = value.mean runner.loader_metrics[key] = value for key, value in runner.loader_metrics.items(): runner.epoch_metrics[f"{runner.loader_name}_{key}"] = value
def step_epoch(self, runner: IRunner) -> None: """@TODO: Docs. Contribution is welcome. Args: runner (IRunner): current runner """ reduced_metric = runner.valid_metrics[self.reduced_metric] lr, momentum = self._scheduler_step(scheduler=self._scheduler, reduced_metric=reduced_metric) if self.scheduler_key is not None: runner.epoch_metrics[f"lr/{self.scheduler_key}"] = lr if momentum is not None: runner.epoch_metrics[ f"momentum/{self.scheduler_key}"] = momentum else: runner.epoch_metrics["lr"] = lr if momentum is not None: runner.epoch_metrics["momentum"] = momentum
def step_epoch(self, runner: IRunner) -> None: """Update momentum in runner. Args: runner: current runner """ reduced_metric = runner.valid_metrics[self.reduced_metric] lr, momentum = self._scheduler_step(scheduler=self._scheduler, reduced_metric=reduced_metric) if self.scheduler_key is not None: runner.epoch_metrics[f"lr/{self.scheduler_key}"] = lr if momentum is not None: runner.epoch_metrics[ f"momentum/{self.scheduler_key}"] = momentum else: runner.epoch_metrics["lr"] = lr if momentum is not None: runner.epoch_metrics["momentum"] = momentum
def on_epoch_end(self, runner: IRunner) -> None: """On epoch end event. Args: runner (IRunner): current runner """ if self.decouple_weight_decay: for i, wd in enumerate(self._optimizer_wd): self._optimizer.param_groups[i]["weight_decay"] = wd lr = self._optimizer.param_groups[0]["lr"] lr_name = (f"lr/{self.optimizer_key}" if self.optimizer_key is not None else "lr") runner.epoch_metrics[lr_name] = lr momentum = utils.get_optimizer_momentum(self._optimizer) if momentum is not None: momentum_name = (f"momentum/{self.optimizer_key}" if self.optimizer_key is not None else "momentum") runner.epoch_metrics[momentum_name] = momentum
def on_stage_start(self, runner: IRunner) -> None: """Stage start hook. Args: runner (IRunner): current runner """ optimizer = runner.get_attr(key="optimizer", inner_key=self.optimizer_key) assert optimizer is not None self._optimizer = optimizer self.init_lr = optimizer.defaults["lr"]
def on_epoch_end(self, runner: IRunner) -> None: """Check if validation metric should be dropped for current epoch. Args: runner (IRunner): current runner """ valid_metric_name = f"{runner.valid_loader}_{runner.main_metric}" if self.valid_loader not in runner.loaders: runner.epoch_metrics[valid_metric_name] = ( float("+inf") if runner.minimize_metric else float("-inf"))