def wd(self, val: float) -> None: "Set weight decay." if not self.true_wd: self.set_val("weight_decay", listify(val, self._wd), bn_groups=self.bn_wd) self._wd = listify(val, self._wd)
def mom(self, val: float) -> None: "Set momentum." if "momentum" in self.hyperparameters: self.set_val("momentum", listify(val, self._mom)) elif "betas" in self.hyperparameters: self.set_val("betas", (listify(val, self._mom), self._beta)) self._mom = listify(val, self._mom)
def beta(self, val: float) -> None: "Set beta (or alpha as makes sense for given optimizer)." if val is None: return if "betas" in self.hyperparameters: self.set_val("betas", (self._mom, listify(val, self._beta))) elif "alpha" in self.hyperparameters: self.set_val("alpha", listify(val, self._beta)) self._beta = listify(val, self._beta)
def __init__( self, data: DataBunch, model: nn.Module, loss_func: Callable, opt_func: Callable = AdamW, metrics: Collection[Callable] = None, true_weight_decay: bool = True, batch_norm_weight_decay: bool = True, weight_decay: Floats = 1e-2, train_bn: bool = True, path: str = ".", model_dir: str = "model", callback_fns: Collection[Callable] = None, callbacks: Collection[Callable] = [], layer_groups: Collection[nn.Module] = None, ): """ :param data: object that wraps the data loaders :param model: the model :param loss_func: the loss function :param opt_func: the optimizer :param metrics: metrics to be calculated after each epoch :param true_weight_decay: :param batch_norm_weight_decay: :param weight_decay: the weight decay :param train_bn: :param path: the root path of the Learner :param model_dir: the name of the directory that the model will be saved in, where path is the parent :param callback_fns: the learner callback functions :param callbacks: the callback function :param layer_groups: the different layers that the model is split into for differential learning rates """ self.data = data self.model = model.to(self.data.device) self.loss_func = loss_func self.opt_func = opt_func self.true_weight_decay = true_weight_decay self.batch_norm_weight_decay = batch_norm_weight_decay self.weight_decay = weight_decay self.train_bn = train_bn self.path = Path(path) self.model_dir = model_dir self.metrics = listify(metrics) self.callbacks = listify(callbacks) self.callback_fns = [Recorder] + listify(callback_fns) if layer_groups is None: self.layer_groups = [nn.Sequential(*flatten_model(self.model))] else: self.layer_groups = layer_groups
def __init__(self, vals: StartOptEnd, n_iter: int, func: Optional[AnnealFunc] = None): if is_tuple(vals): self.start, self.end = (vals[0], vals[1]) elif is_listy(vals): self.start, self.end = vals, listify(0, vals) else: self.start, self.end = vals, 0 self.n_iter = max(1, n_iter) if func is None: self.func = annealing_linear if is_tuple(vals) else annealing_no else: self.func = func self.n = 0
def step(self) -> None: "Set weight decay and step optimizer." # weight decay outside of optimizer step (AdamW) if self.true_wd: for lr, wd, pg1, pg2 in zip( self._lr, self._wd, self.non_batch_norm_param_groups, self.batch_norm_param_groups, ): for p in pg1["params"]: p.data.mul_(1 - wd * lr) if self.bn_wd: for p in pg2["params"]: p.data.mul_(1 - wd * lr) self.set_val("weight_decay", listify(0, self._wd)) self.opt.step()
def __init__( self, learn, lr_max: float, moms: Floats = (0.95, 0.85), div_factor: float = 25.0, pct_start: float = 0.3, ): super().__init__() self.learn = learn self.lr_max = lr_max self.moms = moms self.div_factor = div_factor self.pct_start = pct_start self.moms = tuple(listify(self.moms, 2)) if is_listy(self.lr_max): self.lr_max = np.array(self.lr_max)
def create( cls, opt_func: Union[type, Callable], lr: Union[float, Tuple, List], layer_groups: ModuleList, wd: Floats = 0.0, true_wd: bool = False, bn_wd: bool = True, ) -> optim.Optimizer: "Create an optim.Optimizer from `opt_func` with `lr`. Set lr on `layer_groups`." split_groups = split_layers_into_batch_norm_and_non_batch_norm( layer_groups) parameter_dicts = cls._construct_initial_parameter_dicts_for_trainable_parameters( split_groups) opt = opt_func(parameter_dicts) opt = cls(opt, wd, true_wd, bn_wd) opt.lr = listify(lr, layer_groups) return opt
def fit( self, epochs: int, lr: Union[Floats, slice] = default_lr, wd: Floats = None, callbacks: Collection[Callback] = None, ) -> None: "Fit the model on this learner with `lr` learning rate, `wd` weight decay for `epochs` with `callbacks`." lr = self.lr_range(lr) if wd is None: wd = self.weight_decay self.create_opt(lr, wd) callbacks = [cb(self) for cb in self.callback_fns] + listify(callbacks) fit( epochs, self.model, self.loss_func, opt=self.opt, data=self.data, metrics=self.metrics, callbacks=self.callbacks + callbacks, )
def lr(self, val: float) -> None: "Set learning rate." self._lr = self.set_val("lr", listify(val, self._lr))
def training_scheme_one_cycle(learner, lr, epochs, div_factor=25): lr = [float(lr_) for lr_ in listify(lr)] learner.unfreeze() learner.fit_one_cycle(cyc_len=epochs, max_lr=lr, div_factor=div_factor)