def create_optimizer(self, conf_optim: Config, params) -> Optimizer: # in this case we don't need to differentiate between arch_params and weights # as the same optimizer will update both arch_params = list(self.model.all_owned().param_by_kind('alphas')) nonarch_params = list(self.model.nonarch_params(recurse=True)) param_groups = [{'params': nonarch_params}, {'params': arch_params}] return ml_utils.create_optimizer(conf_optim, param_groups)
def __init__(self, conf_alpha_optim: Config, w_momentum: float, w_decay: float, model: Model, lossfn: _Loss) -> None: self._w_momentum = w_momentum # momentum for w self._w_weight_decay = w_decay # weight decay for w self._lossfn = lossfn self._model = model # main model with respect to w and alpha self._alphas = list(_get_alphas(self._model)) # this is the optimizer to optimize alphas parameter self._alpha_optim = ml_utils.create_optimizer(conf_alpha_optim, self._alphas)
def __init__(self, conf_alpha_optim:Config, w_momentum: float, w_decay: float, model: Model, lossfn: _Loss, device, batch_chunks:int) -> None: self._w_momentum = w_momentum # momentum for w self._w_weight_decay = w_decay # weight decay for w self._lossfn = lossfn self._model = model # main model with respect to w and alpha self.batch_chunks = batch_chunks self.device = device # create a copy of model which we will use # to compute grads for alphas without disturbing # original weights self._vmodel = copy.deepcopy(model).to(device) self._alphas = list(_get_alphas(self._model)) self._valphas = list(_get_alphas(self._vmodel)) # this is the optimizer to optimize alphas parameter self._alpha_optim = ml_utils.create_optimizer(conf_alpha_optim, self._alphas)
def create_optimizer(self, conf_optim: Config, params) -> Optimizer: # return optim that only operates on w, not alphas return ml_utils.create_optimizer( conf_optim, self.model.nonarch_params(recurse=True))
def create_optimizer(self, conf_optim: Config, params) -> Optimizer: optim = ml_utils.create_optimizer(conf_optim, params) return optim