def loss(self, data, output=None, model=None, mode=None): """Sample a subnet and compute its loss.""" loss = super().loss(data, output, model, mode) params = ParamSpace().get_categorical_params( random.randint(0, self.space_size - 1)) ParamSpace().update_params(params) return loss
def __call__(self, model): """Run Exporter.""" if self.with_keys: params = dict(ParamSpace().named_param_values()) else: params = [p.value() for p in ParamSpace().params()] if self.export_fmt: if self.with_keys: return self.export_fmt.format(**params) return self.export_fmt.format(*params) return params
def step(self, estim): """Update Optimizer states using Estimator.""" self.optim_reset() trn_batch = estim.get_cur_train_batch() val_batch = estim.get_next_valid_batch() lr = estim.trainer.get_lr() optimizer = estim.trainer.get_optimizer() model = estim.model if self.v_net is None: self.v_net = copy.deepcopy(model) # do virtual step (calc w`) self._virtual_step(trn_batch, lr, optimizer, estim) # calc unrolled loss loss = estim.loss(val_batch, model=self.v_net, mode='valid') # L_val(w`) # compute gradient alphas = ParamSpace().tensor_values() v_alphas = tuple(alphas) v_weights = tuple(self.v_net.parameters()) v_grads = torch.autograd.grad(loss, v_alphas + v_weights) dalpha = v_grads[:len(v_alphas)] dw = v_grads[len(v_alphas):] hessian = self._compute_hessian(dw, trn_batch, estim) # update final gradient = dalpha - lr*hessian with torch.no_grad(): for a, da, h in zip(alphas, dalpha, hessian): a.grad = da - lr * h self.optim_step()
def _compute_hessian(self, dw, trn_batch, estim): """Compute Hessian matrix. dw = dw` { L_val(w`, alpha) } w+ = w + eps * dw w- = w - eps * dw hessian = (dalpha { L_trn(w+, alpha) } - dalpha { L_trn(w-, alpha) }) / (2*eps) eps = 0.01 / ||dw|| """ model = estim.model alphas = ParamSpace().tensor_values() norm = torch.cat([w.view(-1) for w in dw]).norm() eps = 0.01 / norm # w+ = w + eps*dw` with torch.no_grad(): for p, d in zip(model.parameters(), dw): p += eps * d loss = estim.loss(trn_batch, mode='train') dalpha_pos = torch.autograd.grad(loss, alphas) # dalpha { L_trn(w+) } # w- = w - eps*dw` with torch.no_grad(): for p, d in zip(model.parameters(), dw): p -= 2. * eps * d loss = estim.loss(trn_batch, mode='train') dalpha_neg = torch.autograd.grad(loss, alphas) # dalpha { L_trn(w-) } # recover w with torch.no_grad(): for p, d in zip(model.parameters(), dw): p += eps * d hessian = [(p - n) / 2. * eps.item() for p, n in zip(dalpha_pos, dalpha_neg)] return hessian
def step(self, params): """Return evaluation results of a parameter set.""" ParamSpace().update_params(params) arch_desc = self.get_arch_desc() ret = self.compute_metrics() self.logger.info('Evaluate: {} -> {}'.format(arch_desc, ret)) return ret
def __call__(self, model): """Run constructor.""" ParamSpace().reset() seed = self.seed if seed: np.random.seed(seed) return model
def step(self, params): """Return evaluation results of a parameter set.""" ParamSpace().update_params(params) n_train_batch = self.get_num_train_batch() n_valid_batch = self.get_num_valid_batch() train_epochs = self.train_epochs train_steps = self.train_steps if train_steps == 0: train_steps = n_train_batch elif train_steps == -1: train_steps = max(round(n_train_batch / (n_valid_batch or 1)), 1) if self.reset_training: self.reset_trainer(epochs=train_epochs) for epoch in range(train_epochs): for _ in range(train_steps): self.cur_step += 1 if self.cur_step >= n_train_batch: self.cur_step = -1 break self.train_step(model=self.model, epoch=epoch, tot_epochs=train_epochs, step=self.cur_step, tot_steps=n_train_batch) if (self.cur_step + 1) % self.eval_steps != 0: return {'default': None} arch_desc = self.exporter(self.model) ret = self.compute_metrics() self.logger.info('Evaluate: {} -> {}'.format(arch_desc, ret)) return ret
def __call__(self, model): """Run constructor.""" Slot.reset() ParamSpace().reset() seed = self.seed if seed: init_device(self.device, seed) configure_ops(self.ops_conf or {}) return model
def get_score(self, params, scores): """Return score of given parameters.""" score = 0 for pn, v in params.items(): p = ParamSpace().get_param(pn) idx = p.get_index(v) dim = len(p) if pn not in scores: if self.random_score: p_score = self.rng.rand(dim) p_score = p_score / np.max(p_score) else: p_score = list(range(dim)) scores[pn] = p_score score += scores[pn][idx] score /= len(params) score += 0 if self.noise_scale is None else self.rng.normal( loc=0, scale=self.noise_scale) return score
def print_tensor_params(self, max_num=3): """Log current tensor parameter values.""" logger = self.logger ap_cont = tuple(a.detach().softmax(dim=-1).cpu().numpy() for a in ParamSpace().tensor_values()) max_num = min(len(ap_cont) // 2, max_num) logger.info('TENSOR: {}\n{}'.format( len(ap_cont), '\n'.join([ str(a) for a in (ap_cont[:max_num] + ('...', ) + ap_cont[-max_num:]) ])))
def run_epoch(self, optim, epoch, tot_epochs): """Run Estimator routine for one epoch.""" if epoch == tot_epochs: return 1 config = self.config # train self.print_tensor_params() n_trn_batch = self.get_num_train_batch(epoch) n_val_batch = self.get_num_valid_batch(epoch) update_arch = False arch_epoch_start = config.arch_update_epoch_start arch_epoch_intv = config.arch_update_epoch_intv if epoch >= arch_epoch_start and ( epoch - arch_epoch_start) % arch_epoch_intv == 0: update_arch = True arch_update_intv = config.arch_update_intv if arch_update_intv == -1: # update proportionally arch_update_intv = max(n_trn_batch / n_val_batch, 1) if n_val_batch else 1 elif arch_update_intv == 0: # update last step arch_update_intv = n_trn_batch arch_update_batch = config.arch_update_batch arch_step = 0 for step in range(n_trn_batch): # optim step if update_arch and (step + 1) // arch_update_intv > arch_step: for _ in range(arch_update_batch): optim.step(self) arch_step += 1 # supernet step optim.next() self.trainer.train_step(estim=self, model=self.model, epoch=epoch, tot_epochs=tot_epochs, step=step, tot_steps=n_trn_batch) # eval self.clear_buffer() self.stepped(dict(ParamSpace().named_param_values())) self.wait_done() for _, res, arch_desc in self.buffer(): score = self.get_score(res) if self.best_score is None or (score is not None and score > self.best_score): self.best_score = score self.best_arch_desc = arch_desc # save if config.save_arch_desc: self.save_arch_desc(epoch, arch_desc=arch_desc) if config.save_freq != 0 and epoch % config.save_freq == 0: self.save_checkpoint(epoch) self.save_arch_desc(save_name='best', arch_desc=self.best_arch_desc)
def __init__(self, name=None, space=None, on_update=None): self.name = None self._parent = None self._children = OrderedDict() space = space or ParamSpace() space.register(self, name) self.event_name = 'update:{}'.format(self.name) if on_update is not None: event_on(self.event_name, on_update) set_value_ori = self.set_value def set_value_hooked(*args, **kwargs): set_value_ori(*args, **kwargs) self.on_update() self.set_value = set_value_hooked
def run_epoch(self, optim, epoch, tot_epochs): """Run Estimator routine for one epoch.""" if epoch == tot_epochs: return {'stop': True} config = self.config # train self.print_tensor_params() n_trn_batch = self.get_num_train_batch(epoch) n_val_batch = self.get_num_valid_batch(epoch) update_arch = False arch_epoch_start = config.arch_update_epoch_start arch_epoch_intv = config.arch_update_epoch_intv if epoch >= arch_epoch_start and ( epoch - arch_epoch_start) % arch_epoch_intv == 0: update_arch = True arch_update_intv = config.arch_update_intv if arch_update_intv == -1: # update proportionally arch_update_intv = max(n_trn_batch / n_val_batch, 1) if n_val_batch else 1 elif arch_update_intv == 0: # update last step arch_update_intv = n_trn_batch arch_update_batch = config.arch_update_batch arch_step = 0 for step in range(n_trn_batch): # optim step if update_arch and (step + 1) // arch_update_intv > arch_step: for _ in range(arch_update_batch): optim.step(self) arch_step += 1 # supernet step optim.next() self.trainer.train_step(estim=self, model=self.model, epoch=epoch, tot_epochs=tot_epochs, step=step, tot_steps=n_trn_batch) # eval self.clear_buffer() self.stepped(dict(ParamSpace().named_param_values())) self.wait_done()
def __call__(self, model): """Export archdesc from elastic model config.""" arch_desc = [] max_stage_depth = self.max_stage_depth if self.fix_first: arch_desc.append(None) params = {k: p.value() for k, p in ParamSpace().named_params()} seq_values = [v for k, v in params.items() if k.startswith('seq')] n_sequential = len(seq_values) spa_values = [v for k, v in params.items() if k.startswith('spa')] if not len(spa_values): spa_values = [6] * sum( [len(btn) for btn in model.bottlenecks if len(btn) > 1]) for i, spa in enumerate(spa_values): cur_seq_idx = i // max_stage_depth seq = seq_values[cur_seq_idx] if cur_seq_idx < len( seq_values) else cur_seq_idx exp = spa if cur_seq_idx >= n_sequential or i % max_stage_depth < seq else -1 desc = 'NIL' if exp == -1 else 'MB3E{}'.format(exp) arch_desc.append(desc) return arch_desc
def step(self, params): """Return evaluation results of a parameter set.""" ParamSpace().update_params(params) arch_desc = self.get_arch_desc() config = self.config try: self.construct_subnet(arch_desc) except RuntimeError: self.logger.info('subnet construct failed:\n{}'.format(traceback.format_exc())) ret = {'error_no': -1} return ret tot_epochs = config.subnet_epochs if tot_epochs > 0: self.reset_trainer(epochs=tot_epochs) for epoch in itertools.count(0): if epoch == tot_epochs: break # train self.train_epoch(epoch=epoch, tot_epochs=tot_epochs) ret = self.compute_metrics() self.logger.info('Evaluate: {} -> {}'.format(arch_desc, ret)) return ret
def __init__(self, *args, seed=1, save_best=True, **kwargs): super().__init__(*args, save_best=save_best, **kwargs) random.seed(seed) self.space_size = ParamSpace().categorical_size()
def reset_all(): """Reset all framework states.""" ParamSpace().reset() EventManager().reset()
def step(self, params): """Return evaluation results from remote Estimator.""" ParamSpace().update_params(params) arch_desc = self.get_arch_desc() return self.predictor.predict(arch_desc)
def __init__(self, space=None): self.space = space or ParamSpace()