def train_queue(self, queue, optimizer, criterion=lambda i, l, t: nn.CrossEntropyLoss()(l, t), eval_criterions=None, steps=1, **kwargs): assert steps > 0 self._set_mode("train") average_ans = None for _ in range(steps): data = next(queue) data = (data[0].to(self.get_device()), data[1].to(self.get_device())) _, targets = data outputs = self.forward_data(*data, **kwargs) loss = criterion(data[0], outputs, targets) if eval_criterions: ans = utils.flatten_list( [c(data[0], outputs, targets) for c in eval_criterions]) if average_ans is None: average_ans = ans else: average_ans = [s + a for s, a in zip(average_ans, ans)] self.zero_grad() loss.backward() optimizer.step() self.clear_cache() if eval_criterions: return [s / steps for s in average_ans] return []
def eval_queue(self, queue, criterions, steps=1, mode="eval", aggregate_fns=None, **kwargs): self._set_mode(mode) aggr_ans = [] context = torch.no_grad if self.eval_no_grad else nullcontext with context(): for _ in range(steps): data = next(queue) # print("{}/{}\r".format(i, steps), end="") data = _to_device(data, self.get_device()) outputs = self.forward_data(data[0], **kwargs) self._set_mode("eval") # mAP only is calculated in "eval" mode ans = utils.flatten_list( [c(data[0], outputs, data[1]) for c in criterions]) aggr_ans.append(ans) self._set_mode(mode) aggr_ans = np.asarray(aggr_ans).transpose() if aggregate_fns is None: # by default, aggregate batch rewards with MEAN aggregate_fns = [lambda perfs: np.mean(perfs) if len(perfs) > 0 else 0.]\ * len(aggr_ans) return [aggr_fn(ans) for aggr_fn, ans in zip(aggregate_fns, aggr_ans)]
def eval_queue(self, queue, criterions, steps=1, mode="eval", aggregate_fns=None, **kwargs): # BN running statistics calibration if self.calib_bn_batch > 0: calib_data = [next(queue) for _ in range(self.calib_bn_batch)] self.calib_bn(calib_data) self._set_mode(mode) aggr_ans = [] context = torch.no_grad if self.eval_no_grad else nullcontext with context(): for i in range(steps): if i < self.calib_bn_batch: data = calib_data[i] else: data = next(queue) data = _to_device(data, self.get_device()) outputs = self.forward_data(data[0], **kwargs) ans = utils.flatten_list( [c(data[0], outputs, data[1]) for c in criterions]) aggr_ans.append(ans) aggr_ans = np.asarray(aggr_ans).transpose() if aggregate_fns is None: # by default, aggregate batch rewards with MEAN aggregate_fns = [lambda perfs: np.mean(perfs) if len(perfs) > 0 else 0.]\ * len(aggr_ans) return [aggr_fn(ans) for aggr_fn, ans in zip(aggregate_fns, aggr_ans)]
def eval_data(self, data, criterions, mode="eval", **kwargs): #pylint: disable=arguments-differ """ Override eval_data, to enable gradient. Returns: results (list of results return by criterions) """ self._set_mode(mode) outputs = self.forward_data(data[0], **kwargs) return utils.flatten_list([c(data[0], outputs, data[1]) for c in criterions])
def eval_data(self, data, criterions, mode="eval", **kwargs): """ Returns: results (list of results return by criterions) """ self._set_mode(mode) context = torch.no_grad if self.eval_no_grad else nullcontext with context(): outputs = self.forward_data(data[0], **kwargs) return utils.flatten_list([c(data[0], outputs, data[1]) for c in criterions])
def eval_data(self, data, criterions, mode="eval", **kwargs): # pylint: disable=arguments-differ """ Override eval_data, to enable gradient. Returns: results (list of results return by criterions) """ self._set_mode(mode) outputs = self.forward_data(data[0]) # kwargs is detach_arch: False, since here the forward has no arg detach-arch, so not using the kwargs return utils.flatten_list( [c(data[0], outputs, data[1]) for c in criterions])
def gradient(self, data, criterion=lambda i, l, t: nn.CrossEntropyLoss()(l, t), parameters=None, eval_criterions=None, mode="train", zero_grads=True, return_grads=True, **kwargs): """Get the gradient with respect to the candidate net parameters. Args: parameters (optional): if specificied, can be a dict of param_name: param, or a list of parameter name. Returns: grads (dict of name: grad tensor) """ self._set_mode(mode) if return_grads: active_parameters = dict(self.named_parameters()) if parameters is not None: _parameters = dict(parameters) _addi = set(_parameters.keys()).difference(active_parameters) assert not _addi,\ ("Cannot get gradient of parameters that are not active " "in this candidate net: {}")\ .format(", ".join(_addi)) else: _parameters = active_parameters _, targets = data outputs = self.forward_data(*data, **kwargs) loss = criterion(data[0], outputs, targets) if zero_grads: self.zero_grad() loss.backward() if not return_grads: grads = None else: grads = [(k, v.grad.clone()) for k, v in six.iteritems(_parameters)\ if v.grad is not None] if eval_criterions: eval_res = utils.flatten_list( [c(data[0], outputs, targets) for c in eval_criterions]) return grads, eval_res return grads
def eval_queue(self, queue, criterions, steps=1, mode="eval", **kwargs): self._set_mode(mode) average_ans = None context = torch.no_grad if self.eval_no_grad else nullcontext with context(): for _ in range(steps): data = next(queue) # print("{}/{}\r".format(i, steps), end="") data = (data[0].to(self.get_device()), data[1].to(self.get_device())) outputs = self.forward_data(data[0], **kwargs) ans = utils.flatten_list([c(data[0], outputs, data[1]) for c in criterions]) if average_ans is None: average_ans = ans else: average_ans = [s + a for s, a in zip(average_ans, ans)] return [s / steps for s in average_ans]
def _init_criterions(self, rollout_type): # criterion and forward keyword arguments for evaluating rollout in `evaluate_rollout` # support compare rollout assert "differentiable" in rollout_type # NOTE: only handle differentiable rollout differently self._reward_func = partial( self.objective.get_loss, add_controller_regularization=True, add_evaluator_regularization=False, ) self._reward_kwargs = {"detach_arch": False} self._scalar_reward_func = lambda *args, **kwargs: utils.get_numpy( self._reward_func(*args, **kwargs) ) self._perf_names = self.objective.perf_names() self._all_perf_names = utils.flatten_list(["reward", "loss", self._perf_names]) # criterion funcs for meta parameter training self._eval_loss_func = partial( self.objective.get_loss, add_controller_regularization=False, add_evaluator_regularization=True, ) # criterion funcs for log/report self._report_loss_funcs = [ partial( self.objective.get_loss_item, add_controller_regularization=False, add_evaluator_regularization=False, ), self.objective.get_perfs, ] self._criterions_related_attrs = [ "_reward_func", "_reward_kwargs", "_scalar_reward_func", "_reward_kwargs", "_perf_names", "_eval_loss_func", "_report_loss_funcs", ]
def train_queue(self, queue, optimizer, criterion=lambda i, l, t: nn.CrossEntropyLoss()(l, t), eval_criterions=None, steps=1, aggregate_fns=None, **kwargs): assert steps > 0 self._set_mode("train") aggr_ans = [] for _ in range(steps): data = next(queue) data = _to_device(data, self.get_device()) _, targets = data outputs = self.forward_data(*data, **kwargs) loss = criterion(data[0], outputs, targets) if eval_criterions: ans = utils.flatten_list( [c(data[0], outputs, targets) for c in eval_criterions]) aggr_ans.append(ans) self.zero_grad() loss.backward() optimizer.step() self.clear_cache() if eval_criterions: aggr_ans = np.asarray(aggr_ans).transpose() if aggregate_fns is None: # by default, aggregate batch rewards with MEAN aggregate_fns = [ lambda perfs: np.mean(perfs) if len(perfs) > 0 else 0.0 ] * len(aggr_ans) return [ aggr_fn(ans) for aggr_fn, ans in zip(aggregate_fns, aggr_ans) ] return []
def _init_criterions(self, rollout_type): # criterion and forward keyword arguments for evaluating rollout in `evaluate_rollout` # support compare rollout if rollout_type == "compare": # init criterions according to weights manager's rollout type rollout_type = self.weights_manager.rollout_type self._reward_func = self.objective.get_reward self._reward_kwargs = {} self._scalar_reward_func = self._reward_func self._perf_names = self.objective.perf_names() self._all_perf_names = utils.flatten_list(["reward", "loss", self._perf_names]) # criterion funcs for meta parameter training self._eval_loss_func = partial( self.objective.get_loss, add_controller_regularization=False, add_evaluator_regularization=True, ) # criterion funcs for log/report self._report_loss_funcs = [ partial( self.objective.get_loss_item, add_controller_regularization=False, add_evaluator_regularization=False, ), self.objective.get_perfs, ] self._criterions_related_attrs = [ "_reward_func", "_reward_kwargs", "_scalar_reward_func", "_reward_kwargs", "_perf_names", "_eval_loss_func", "_report_loss_funcs", ]
def eval_queue(self, queue, criterions, steps=1, mode="eval", aggregate_fns=None, **kwargs): # BN running statistics calibration if self.calib_bn_num > 0: # check `calib_bn_num` first calib_num = 0 calib_data = [] calib_batch = 0 while calib_num < self.calib_bn_num: if calib_batch == steps: utils.getLogger("robustness plugin.{}".format(self.__class__.__name__)).warn( "steps (%d) reached, true calib bn num (%d)", calib_num, steps) break calib_data.append(next(queue)) calib_num += len(calib_data[-1][1]) calib_batch += 1 self.calib_bn(calib_data) elif self.calib_bn_batch > 0: if self.calib_bn_batch > steps: utils.getLogger("robustness plugin.{}".format(self.__class__.__name__)).warn( "eval steps (%d) < `calib_bn_batch` (%d). Only use %d batches.", steps, self.calib_bn_steps, steps) calib_bn_batch = steps else: calib_bn_batch = self.calib_bn_batch # check `calib_bn_batch` then calib_data = [next(queue) for _ in range(calib_bn_batch)] self.calib_bn(calib_data) else: calib_data = [] self._set_mode("eval") # Use eval mode after BN calibration aggr_ans = [] context = torch.no_grad if self.eval_no_grad else nullcontext with context(): for i in range(steps): if i < len(calib_data):# self.calib_bn_batch: data = calib_data[i] else: data = next(queue) data = _to_device(data, self.get_device()) outputs = self.forward_data(data[0], **kwargs) ans = utils.flatten_list( [c(data[0], outputs, data[1]) for c in criterions]) aggr_ans.append(ans) del outputs print("\reva step {}/{} ".format(i, steps), end="", flush=True) aggr_ans = np.asarray(aggr_ans).transpose() if aggregate_fns is None: # by default, aggregate batch rewards with MEAN aggregate_fns = [lambda perfs: np.mean(perfs) if len(perfs) > 0 else 0.]\ * len(aggr_ans) return [aggr_fn(ans) for aggr_fn, ans in zip(aggregate_fns, aggr_ans)]
def gradient(self, data, criterion=lambda i, l, t: nn.CrossEntropyLoss()(l, t), parameters=None, eval_criterions=None, mode="train", zero_grads=True, return_grads=True, **kwargs): """Get the gradient with respect to the candidate net parameters. Args: parameters (optional): if specificied, can be a dict of param_name: param, or a list of parameter name. Returns: grads (dict of name: grad tensor) """ self._set_mode(mode) if return_grads: active_parameters = dict(self.named_parameters()) if parameters is not None: _parameters = dict(parameters) _addi = set(_parameters.keys()).difference(active_parameters) assert not _addi,\ ("Cannot get gradient of parameters that are not active " "in this candidate net: {}")\ .format(", ".join(_addi)) else: _parameters = active_parameters inputs, targets = data batch_size = inputs.size(0) min_image_size = min(self.super_net.search_space.image_size_choice) cur_image_size = self.rollout.image_size ratio = (min_image_size / cur_image_size)**2 mini_batch_size = make_divisible(batch_size * ratio, 8) inputs = F.interpolate(inputs, (cur_image_size, cur_image_size), mode="bilinear", align_corners=False) if zero_grads: self.zero_grad() for i in range( 0, batch_size // mini_batch_size + int(batch_size % mini_batch_size != 0), mini_batch_size): mini_inputs = inputs[i:i + mini_batch_size] mini_targets = targets[i:i + mini_batch_size] outputs = self.forward_data(mini_inputs, mini_targets, **kwargs) loss = criterion(mini_inputs, outputs, mini_targets) loss.backward() if not return_grads: grads = None else: grads = [(k, v.grad.clone()) for k, v in six.iteritems(_parameters) if v.grad is not None] if eval_criterions: eval_res = utils.flatten_list([ c(mini_inputs, outputs, mini_targets) for c in eval_criterions ]) return grads, eval_res return grads