def forward(self, features): expect(isinstance(features, (list, tuple)), 'features must be a series of feature.', ValueError) features = self.extras(features) confidences = self.classification_headers(features) locations = self.regression_headers(features) return confidences, locations
def __init__( #pylint: disable=dangerous-default-value self, dataset, weights_manager, objective, rollout_type="mutation", template_cfg_file=None, save_every=10, bf_checkpoints=[10, 20, 40, 60, 80], schedule_cfg=None): # do not need dataset, weights manager super(BFTuneEvaluator, self).__init__(dataset=None, weights_manager=None, objective=objective, rollout_type=rollout_type, schedule_cfg=schedule_cfg) expect(template_cfg_file is not None, "Must specified `template_cfg_file` configuration", ConfigException) self.template_cfg_file = template_cfg_file self.save_every = save_every self.bf_checkpoints = bf_checkpoints with open(template_cfg_file, "r") as cfg_f: self.cfg_template = ConfigTemplate(yaml.safe_load(cfg_f)) self.logger.info("Read the template config from %s", template_cfg_file) # assume gpu self.device = "0" self._perf_names = self.objective.perf_names() self.log_pattern = re.compile( "valid performances: " + \ "; ".join( ["{}: ([-0-9.]+)".format(n) for n in self._perf_names]))
def genotype(self, arch): """ Get a human readable description of an discrete architecture. """ expect(len(arch) == self.num_cell_groups) # =1 genotype_list = [] concat_list = [] for cg_arch in arch: genotype = [] nodes, ops = cg_arch used_end = set() for i_out in range(self.num_steps): for i_in in range(self.num_node_inputs): idx = i_out * self.num_node_inputs + i_in from_ = int(nodes[idx]) used_end.add(from_) genotype.append((self.shared_primitives[ops[idx]], from_, int(i_out + self.num_init_nodes))) genotype_list.append(genotype) if self.loose_end: concat = [ i for i in range(1, self.num_steps + 1) if i not in used_end ] else: concat = list(range(1, self.num_steps + 1)) concat_list.append(concat) kwargs = dict( itertools.chain( zip(self.cell_group_names, genotype_list), zip([n + "_concat" for n in self.cell_group_names], concat_list))) return self.genotype_type(**kwargs)
def infer_epoch(self, valid_queue, model, criterion, device): expect(self._is_setup, "trainer.setup should be called first") objs = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() objective_perfs = utils.OrderedStats() model.eval() context = torch.no_grad if self.eval_no_grad else nullcontext with context(): for step, (inputs, target) in enumerate(valid_queue): inputs = inputs.to(device) target = target.to(device) logits = model(inputs) loss = criterion(logits, target) perfs = self._perf_func(inputs, logits, target, model) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = inputs.size(0) objective_perfs.update(dict(zip(self._perf_names, perfs)), n=n) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % self.report_every == 0: self.logger.info("valid %03d %e %f %f %s", step, objs.avg, top1.avg, top5.avg, "; ".join(["{}: {:.3f}".format(perf_n, v) \ for perf_n, v in objective_perfs.avgs().items()])) return top1.avg, objs.avg, objective_perfs.avgs()
def __init__(self, search_space, device, rollout_type=None, mode="eval", init_population_size=100, perf_names=["reward"], mutate_kwargs={}, eval_sample_strategy="all", schedule_cfg=None): super(ParetoEvoController, self).__init__(search_space, rollout_type, mode, schedule_cfg) expect( eval_sample_strategy in {"all", "n"}, "Invalid `eval_sample_strategy` {}, choices: {}".format( eval_sample_strategy, ["all", "n"]), ConfigException) self.init_population_size = init_population_size self.perf_names = perf_names self.mutate_kwargs = mutate_kwargs self.eval_sample_strategy = eval_sample_strategy # after initial random sampling, only pareto front points are saved in the population self.population = collections.OrderedDict() # whether or not sampling by mutation from pareto front has started self._start_pareto_sample = False
def train_epoch(self, train_queue, model, criterion, optimizer, device, epoch): expect(self._is_setup, "trainer.setup should be called first") cls_objs = utils.AverageMeter() loc_objs = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() model.train() for step, (inputs, targets) in enumerate(train_queue): inputs = inputs.to(self.device) # targets = targets.to(self.device) optimizer.zero_grad() predictions = model.forward(inputs) classification_loss, regression_loss = criterion(inputs, predictions, targets, model) loss = classification_loss + regression_loss loss.backward() nn.utils.clip_grad_norm_(model.parameters(), self.grad_clip) optimizer.step() prec1, prec5 = self._acc_func(inputs, predictions, targets, model) n = inputs.size(0) cls_objs.update(classification_loss.item(), n) loc_objs.update(regression_loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % self.report_every == 0: self.logger.info("train %03d %.3f %.3f; %.2f%%; %.2f%%", step, cls_objs.avg, loc_objs.avg, top1.avg, top5.avg) return top1.avg, cls_objs.avg + loc_objs.avg
def _run_dnnc(self, name, prototxt, caffemodel, output_dir, dcf, mode, debug=False): self.logger.info("-------- Run dnnc --------") if not os.path.exists(output_dir): os.makedirs(output_dir) expect(dcf is not None, "must specificy dcf file", ConfigException) subprocess.check_call( ("dnnc --mode {mode} --cpu_arch arm64 --save_kernel --prototxt {prototxt}" " --caffemodel {caffemodel} --output_dir {output_dir} --dcf {dcf} " "--net_name {name}{debug_cmd}").format( name=name, prototxt=prototxt, caffemodel=caffemodel, output_dir=output_dir, dcf=dcf, mode=mode, debug_cmd=" --dump=all" if debug else "", ), shell=True, ) output_elf = os.path.join(output_dir, "dpu_{}.elf".format(name)) self.logger.info( "Finish running dnnc for {} (mode: {}), elf file: {}.".format( name, mode, output_elf)) return output_elf
def __init__( #pylint: disable=dangerous-default-value self, dataset, weights_manager, objective, rollout_type="mutation", schedule_cfg=None): super(TuneEvaluator, self).__init__(dataset=None, weights_manager=weights_manager, objective=objective, rollout_type=rollout_type, schedule_cfg=schedule_cfg) # check rollout type expect( self.rollout_type == self.weights_manager.rollout_type, "the rollout type of evaluator/weights_manager must match, " "check the configuration. ({}/{})".format( self.rollout_type, self.weights_manager.rollout_type), ConfigException) # assume gpu self.device = str(self.weights_manager.device.index) self._perf_names = self.objective.perf_names() self.log_pattern = re.compile( "valid performances: " + \ "; ".join( ["{}: ([-0-9.]+)".format(n) for n in self._perf_names]))
def __init__( self, macro_search_space_type="macro-stagewise", macro_search_space_cfg={}, micro_search_space_type="micro-dense", micro_search_space_cfg={}, schedule_cfg=None, ): super(Layer2SearchSpace, self).__init__(schedule_cfg) self.macro_search_space = SearchSpace.get_class_(macro_search_space_type)( **macro_search_space_cfg ) self.micro_search_space = SearchSpace.get_class_(micro_search_space_type)( **micro_search_space_cfg ) expect( self.macro_search_space.num_cell_groups == self.micro_search_space.num_cell_groups, "Macro/Micro search space expect the same cell group configuration, " "get {}/{} instead.".format( self.macro_search_space.num_cell_groups, self.micro_search_space.num_cell_groups, ), ConfigException, )
def setup(self, load=None, load_state_dict=None, save_every=None, train_dir=None, report_every=50): expect( not (load is not None and load_state_dict is not None), "`load` and `load_state_dict` cannot be passed simultaneously.") if load is not None: self.load(load) else: assert self.model is not None if load_state_dict is not None: self._load_state_dict(load_state_dict) self.logger.info("param size = %f M", utils.count_parameters(self.model) / 1.e6) self._parallelize() self.save_every = save_every self.train_dir = train_dir self.report_every = report_every expect( self.save_every is None or self.train_dir is not None, "when `save_every` is not None, make sure `train_dir` is not None") self._is_setup = True
def __init__(self, C, C_out, stride, primitives, partial_channel_proportion=None): super(SharedOp, self).__init__() self.primitives = primitives self.stride = stride self.partial_channel_proportion = partial_channel_proportion if self.partial_channel_proportion is not None: expect( C % self.partial_channel_proportion == 0, "partial_channel_proportion must be divisible by #channels", ConfigException) expect( C_out % self.partial_channel_proportion == 0, "partial_channel_proportion must be divisible by #channels", ConfigException) C = C // self.partial_channel_proportion C_out = C_out // self.partial_channel_proportion self.p_ops = nn.ModuleList() for primitive in self.primitives: op = ops.get_op(primitive)(C, C_out, stride, False) if "pool" in primitive: op = nn.Sequential(op, nn.BatchNorm2d(C_out, affine=False)) self.p_ops.append(op)
def __init__(self, search_space, # adversarial epsilon=0.03, n_step=5, step_size=0.0078, rand_init=False, # loss adv_loss_coeff=0., as_controller_regularization=False, as_evaluator_regularization=False, # reward adv_reward_coeff=0., schedule_cfg=None): super(AdversarialRobustnessObjective, self).__init__(search_space, schedule_cfg) # adversarial generator self.adv_generator = PgdAdvGenerator(epsilon, n_step, step_size, rand_init) self.adv_reward_coeff = adv_reward_coeff self.adv_loss_coeff = adv_loss_coeff self.as_controller_regularization = as_controller_regularization self.as_evaluator_regularization = as_evaluator_regularization self.cache_hit = 0 self.cache_miss = 0 if self.adv_loss_coeff > 0: expect(self.as_controller_regularization or self.as_evaluator_regularization, "When `adv_loss_coeff` > 0, you should either use this adversarial loss" " as controller regularization or as evaluator regularization, or both. " "By setting `as_controller_regularization` and `as_evaluator_regularization`.", ConfigException)
def tokenize(self, path): """Tokenizes a text file.""" expect(os.path.exists(path)) # Add words to the dictionary with open(path, "r", encoding="utf-8") as f: tokens = 0 for line in f: words = line.split() + ["<eos>"] tokens += len(words) for word in words: self.vocabulary.add_word(word) # Tokenize file content sents = [] with open(path, "r", encoding="utf-8") as f: for line in f: if not line: continue words = line.split() + ["<eos>"] sent = torch.LongTensor(len(words)) for i, word in enumerate(words): sent[i] = self.vocabulary.word2idx[word] sents.append(sent) return sents
def rollout_from_genotype(self, genotype): """Convert genotype (semantic representation) to arch (controller representation)""" cell_strs = list(genotype._asdict().values()) arch = [] for cell_str in cell_strs: node_strs = cell_str.strip().split("+") _geno_num_i_nodes = int(node_strs[0].split("~")[1]) expect( _geno_num_i_nodes == self.num_init_nodes, ("Search space configuration (`num_init_nodes={}` " "differs from the genotype specification {})").format( self.num_init_nodes, _geno_num_i_nodes), ) all_conn_ops = [[ conn_str.split("~") for conn_str in node_str[1:-1].split("|") ] if node_str.strip("|") else [] for node_str in node_strs[1:]] all_conn_op_inds = tuple( zip(*[( i_node + self.num_init_nodes, int(conn_op[1]), self.primitives.index(conn_op[0]), ) for i_node, step_conn_ops in enumerate(all_conn_ops) for conn_op in step_conn_ops]) ) # [(output_node, input_node, op_id)]: the index tuples of `arch` cell_arch = np.zeros( (self._num_nodes, self._num_nodes, self.num_op_choices)) cell_arch[all_conn_op_inds] = 1 arch.append(cell_arch) return DenseMicroRollout(arch, search_space=self)
def _transform_kernel(self, origin_filter, kernel_size): expect( kernel_size in self.kernel_sizes, "The kernel_size must be one of {}, got {} instead".format( self.kernel_sizes, kernel_size), ValueError) if origin_filter.shape[-1] == kernel_size: return origin_filter if not self.do_kernel_transform: return get_sub_kernel(origin_filter, kernel_size) cur_filter = origin_filter expect( cur_filter.shape[-1] > kernel_size, "The kernel size must be less than origin kernel size {}, got {} instead." .format(origin_filter.shape[-1], kernel_size), ValueError) for smaller, larger in reversed( list(zip(self.kernel_sizes[:-1], self.kernel_sizes[1:]))): if cur_filter.shape[-1] < larger: continue if kernel_size >= larger: break sub_filter = get_sub_kernel(origin_filter, smaller).view(cur_filter.shape[0], cur_filter.shape[1], -1) sub_filter = sub_filter.view(-1, sub_filter.shape[-1]) sub_filter = getattr(self, "linear_{}to{}".format(larger, smaller))(sub_filter) sub_filter = sub_filter.view(origin_filter.shape[0], origin_filter.shape[1], smaller**2) sub_filter = sub_filter.view(origin_filter.shape[0], origin_filter.shape[1], smaller, smaller) cur_filter = sub_filter return cur_filter
def train_epoch(self, train_queue, model, criterion, optimizer, device, epoch): expect(self._is_setup, "trainer.setup should be called first") top1 = utils.AverageMeter() top5 = utils.AverageMeter() losses_obj = utils.OrderedStats() model.train() for step, (inputs, targets) in enumerate(train_queue): inputs = inputs.to(self.device) optimizer.zero_grad() predictions = model.forward(inputs) losses = criterion(inputs, predictions, targets, model) loss = sum(losses.values()) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), self.grad_clip) optimizer.step() prec1, prec5 = self._acc_func(inputs, predictions, targets, model) n = inputs.size(0) losses_obj.update(losses) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % self.report_every == 0: self.logger.info("train %03d %.2f%%; %.2f%%; %s", step, top1.avg, top5.avg, "; ".join( ["{}: {:.3f}".format(perf_n, v) \ for perf_n, v in losses_obj.avgs().items()])) return top1.avg, sum(losses_obj.avgs().values())
def __init__(self, search_space, device, rollout_type=None, mode="eval", population_size=100, parent_pool_size=10, mutate_kwargs={}, eval_sample_strategy="population", elimination_strategy="regularized", schedule_cfg=None): super(EvoController, self).__init__(search_space, rollout_type, mode, schedule_cfg) expect( eval_sample_strategy in {"population", "all"}, "Invalid `eval_sample_strategy` {}, choices: {}".format( eval_sample_strategy, ["population", "all"]), ConfigException) expect( elimination_strategy in {"regularized", "perf"}, "Invalid `elimination_strategy` {}, choices: {}".format( elimination_strategy, ["regularized", "perf"]), ConfigException) self.population_size = population_size self.parent_pool_size = parent_pool_size self.mutate_kwargs = mutate_kwargs self.eval_sample_strategy = eval_sample_strategy self.elimination_strategy = elimination_strategy self.population = collections.OrderedDict() # keep track of all seen rollouts and scores self._gt_rollouts = [] self._gt_scores = []
def __new__(cls, device, num_classes, feature_channels, expansions=[0.5, 0.5, 0.5, 0.5], channels=[512, 256, 256, 64], aspect_ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]], pretrained_path=None, schedule_cfg=None): """ feature_channels: the channels of 2 feature_maps (C4, C5) extracted from backbone channels: extras feature_maps(C6, C7, C8, C9) channels """ head_channels = feature_channels + channels extras = Extras(expansions, head_channels[1:]) multi_ratio = [len(r) * 2 + 2 for r in aspect_ratios] regression_headers = Classifier(4, head_channels, multi_ratio) classification_headers = Classifier(num_classes + 1, head_channels, multi_ratio) expect( None not in [extras, regression_headers, classification_headers], "Extras, regression_headers and classification_headers must be provided, " "got None instead.", ConfigException) head = HeadModel(device, num_classes=num_classes + 1, extras=extras, regression_headers=regression_headers, classification_headers=classification_headers) if pretrained_path: mismatch = head.load_state_dict(torch.load(pretrained_path, "cpu"), strict=False) logger.info(mismatch) return head
def __init__(self, search_space, arch_embedder_type="lstm", arch_embedder_cfg=None, mlp_hiddens=(200, 200, 200), mlp_dropout=0.1, optimizer={ "type": "Adam", "lr": 0.001 }, scheduler=None, compare_loss_type="margin_linear", compare_margin=0.01, pairing_method="concat", diff_only=False, train_use_sigmoid=False, sorting_residue_worse_thresh=100, sorting_residue_better_thresh=100, max_grad_norm=None, schedule_cfg=None): # [optional] arch reconstruction loss (arch_decoder_type/cfg) super(PairwiseComparator, self).__init__(schedule_cfg) nn.Module.__init__(self) # configs expect(compare_loss_type in {"binary_cross_entropy", "margin_linear"}, "comparing loss type {} not supported".format(compare_loss_type), ConfigException) self.compare_loss_type = compare_loss_type self.compare_margin = compare_margin expect(pairing_method in {"concat", "diff"}, "pairing method {} not supported".format(pairing_method), ConfigException) self.pairing_method = pairing_method self.sorting_residue_worse_thresh = sorting_residue_worse_thresh self.sorting_residue_better_thresh = sorting_residue_better_thresh self.max_grad_norm = max_grad_norm self.train_use_sigmoid = train_use_sigmoid self.diff_only = diff_only self.search_space = search_space ae_cls = ArchEmbedder.get_class_(arch_embedder_type) self.arch_embedder = ae_cls(self.search_space, **(arch_embedder_cfg or {})) dim = self.embedding_dim = self.arch_embedder.out_dim \ if (diff_only and pairing_method == "diff") \ else 2 * self.arch_embedder.out_dim # construct MLP from embedding to score self.mlp = [] for hidden_size in mlp_hiddens: self.mlp.append(nn.Sequential( nn.Linear(dim, hidden_size), nn.ReLU(inplace=False), nn.Dropout(p=mlp_dropout))) dim = hidden_size self.mlp.append(nn.Linear(dim, 1)) self.mlp = nn.Sequential(*self.mlp) # init optimizer and scheduler self.optimizer = utils.init_optimizer(self.parameters(), optimizer) self.scheduler = utils.init_scheduler(self.optimizer, scheduler)
def __init__(self, search_space, device, rollout_type, schedule_cfg=None): super(BaseWeightsManager, self).__init__(schedule_cfg) self.search_space = search_space self.device = device expect(rollout_type in self.all_supported_rollout_types(), "Unsupported `rollout_type`: {}".format(rollout_type), ConfigException) # supported rollout types self.rollout_type = rollout_type
def __init__(self, search_space, rollout_type, mode="eval", schedule_cfg=None): super(BaseController, self).__init__(schedule_cfg) self.search_space = search_space expect(rollout_type in self.all_supported_rollout_types(), "Unsupported `rollout_type`: {}".format(rollout_type), ConfigException) # supported rollout types self.rollout_type = rollout_type self.mode = mode
def _assert_keys(dct, mandatory_keys, possible_keys, name): if mandatory_keys: expect(set(mandatory_keys).issubset(dct.keys()), "{} schedule cfg must have keys: ({})".format(name, ", ".join(mandatory_keys))) if possible_keys: addi_keys = set(dct.keys()).difference(possible_keys) expect(not addi_keys, "{} schedule cfg cannot have keys: ({}); all possible keys: ({})"\ .format(name, ", ".join(addi_keys), ", ".join(possible_keys)))
def __init__( self, search_space, device, num_tokens, rollout_type="differentiable", num_emb=300, num_hid=300, tie_weight=True, decoder_bias=True, share_primitive_weights=False, share_from_weights=False, batchnorm_step=False, batchnorm_edge=False, batchnorm_out=True, # training max_grad_norm=5.0, # dropout probs dropout_emb=0., dropout_inp0=0., dropout_inp=0., dropout_hid=0., dropout_out=0., candidate_virtual_parameter_only=False): expect( not search_space.loose_end, "Differentiable NAS searching do not support loose-ended search_space", ConfigException) if share_from_weights: # darts cell_cls = RNNDiffSharedFromCell else: cell_cls = RNNDiffSharedCell super(RNNDiffSuperNet, self).__init__(search_space, device, rollout_type, cell_cls=cell_cls, op_cls=RNNDiffSharedOp, num_tokens=num_tokens, num_emb=num_emb, num_hid=num_hid, tie_weight=tie_weight, decoder_bias=decoder_bias, share_primitive_weights=share_primitive_weights, share_from_weights=share_from_weights, batchnorm_step=batchnorm_step, batchnorm_edge=batchnorm_edge, batchnorm_out=batchnorm_out, max_grad_norm=max_grad_norm, dropout_emb=dropout_emb, dropout_inp0=dropout_inp0, dropout_inp=dropout_inp, dropout_hid=dropout_hid, dropout_out=dropout_out) self.candidate_virtual_parameter_only = candidate_virtual_parameter_only
def plot_cell(self, genotype_concat, filename, cell_index, label="", edge_labels=None, plot_format="pdf"): """Plot a cell to `filename` on disk.""" genotype, concat = genotype_concat from graphviz import Digraph if edge_labels is not None: expect(len(edge_labels) == len(genotype)) graph = Digraph( format=plot_format, # https://stackoverflow.com/questions/4714262/graphviz-dot-captions body=["label=\"{l}\"".format(l=label), "labelloc=top", "labeljust=left"], edge_attr=dict(fontsize="20", fontname="times"), node_attr=dict(style="filled", shape="rect", align="center", fontsize="20", height="0.5", width="0.5", penwidth="2", fontname="times"), engine="dot") graph.body.extend(["rankdir=LR"]) node_names = ["c_{k-" + str(self.num_init_nodes - i_in) + "}"\ for i_in in range(self.num_init_nodes)] [graph.node(node_name, fillcolor="darkseagreen2") for node_name in node_names] num_steps = self.get_num_steps(cell_index) for i in range(num_steps): graph.node(str(i), fillcolor="lightblue") node_names += [str(i) for i in range(num_steps)] for i, (op_type, from_, to_) in enumerate(genotype): if op_type == "none": continue edge_label = op_type if edge_labels is not None: edge_label = edge_label + "; " + str(edge_labels[i]) graph.edge(node_names[from_], node_names[to_], label=edge_label, fillcolor="gray") graph.node("c_{k}", fillcolor="palegoldenrod") for node in concat: if node < self.num_init_nodes: from_ = "c_{k-" + str(self.num_init_nodes - node) + "}" else: from_ = str(node - self.num_init_nodes) graph.edge(from_, "c_{k}", fillcolor="gray") graph.render(filename, view=False) return filename + ".{}".format(plot_format)
def __init__(self, search_space, arch_embedder_type="lstm", arch_embedder_cfg=None, mlp_hiddens=(200, 200, 200), mlp_dropout=0.1, optimizer={ "type": "Adam", "lr": 0.001 }, scheduler=None, compare_loss_type="margin_linear", compare_margin=0.01, margin_l2=False, use_incorrect_list_only=False, tanh_score=None, max_grad_norm=None, schedule_cfg=None): # [optional] arch reconstruction loss (arch_decoder_type/cfg) super(PointwiseComparator, self).__init__(schedule_cfg) nn.Module.__init__(self) # configs expect( compare_loss_type in {"binary_cross_entropy", "margin_linear"}, "comparing loss type {} not supported".format(compare_loss_type), ConfigException) self.compare_loss_type = compare_loss_type self.compare_margin = compare_margin self.margin_l2 = margin_l2 self.max_grad_norm = max_grad_norm # for update_argsort listwise only self.use_incorrect_list_only = use_incorrect_list_only self.tanh_score = tanh_score self.search_space = search_space ae_cls = ArchEmbedder.get_class_(arch_embedder_type) self.arch_embedder = ae_cls(self.search_space, **(arch_embedder_cfg or {})) dim = self.embedding_dim = self.arch_embedder.out_dim # construct MLP from embedding to score self.mlp = [] for hidden_size in mlp_hiddens: self.mlp.append( nn.Sequential(nn.Linear(dim, hidden_size), nn.ReLU(inplace=False), nn.Dropout(p=mlp_dropout))) dim = hidden_size self.mlp.append(nn.Linear(dim, 1)) self.mlp = nn.Sequential(*self.mlp) # init optimizer and scheduler self.optimizer = utils.init_optimizer(self.parameters(), optimizer) self.scheduler = utils.init_scheduler(self.optimizer, scheduler)
def test(cfg_file, load, load_state_dict, split, gpus, seed): #pylint: disable=redefined-builtin assert (load is None) + (load_state_dict is None) == 1, \ "One and only one of `--load` and `--load-state-dict` arguments is required." setproctitle.setproctitle("awnas-test config: {}; load: {}; cwd: {}"\ .format(cfg_file, load, os.getcwd())) # set gpu gpu_list = [int(g) for g in gpus.split(",")] if not gpu_list: _set_gpu(None) device = "cpu" else: _set_gpu(gpu_list[0]) device = torch.device("cuda:{}".format(gpu_list[0]) if torch.cuda. is_available() else "cpu") # set seed if seed is not None: LOGGER.info("Setting random seed: %d.", seed) np.random.seed(seed) random.seed(seed) torch.manual_seed(seed) # load components config LOGGER.info("Loading configuration files.") with open(cfg_file, "r") as f: cfg = yaml.safe_load(f) # initialize components LOGGER.info("Initializing components.") whole_dataset = _init_component(cfg, "dataset") search_space = _init_component(cfg, "search_space") objective = _init_component(cfg, "objective", search_space=search_space) trainer = _init_component( cfg, "final_trainer", dataset=whole_dataset, model=_init_component( cfg, "final_model", search_space=search_space, device=device) if load_state_dict else None, device=device, gpus=gpu_list, objective=objective) # check trainer support for data type _data_type = whole_dataset.data_type() expect(_data_type in trainer.supported_data_types()) # start training LOGGER.info("Start eval.") trainer.setup(load, load_state_dict) for split_name in split: trainer.evaluate_split(split_name)
def __init__(self, redis_addr=None): super(RayDispatcher, self).__init__() expect(redis_addr is not None, "Redis address must be specified", ConfigException) self.redis_addr = redis_addr ray.init(redis_address=redis_addr) self.killer = Killer.remote() # create the killer actor self.evaluator = None self.evaluate_func = None self.ckpt_dir = None self.executing_ids = set()
def plot_arch(self, genotypes, filename, label="", edge_labels=None, plot_format="pdf"): #pylint: disable=arguments-differ """Plot an architecture to files on disk""" genotypes = list(genotypes._asdict().items()) expect(len(genotypes) == 2 and self.num_cell_groups == 1, "Current RNN search space only support one cell group") expect(self.num_init_nodes == 1, "Current RNN search space only support one init node") # only one cell group now! geno_, concat_ = genotypes geno_, concat_ = geno_[1], concat_[1] edge_labels = edge_labels[0] if edge_labels is not None else None filename = filename + "-" + genotypes[0][0] from graphviz import Digraph graph = Digraph( format=plot_format, # https://stackoverflow.com/questions/4714262/graphviz-dot-captions body=["label=\"{l}\"".format(l=label), "labelloc=top", "labeljust=left"], edge_attr=dict(fontsize="20", fontname="times"), node_attr=dict(style="filled", shape="rect", align="center", fontsize="20", height="0.5", width="0.5", penwidth="2", fontname="times"), engine="dot") graph.body.extend(["rankdir=LR"]) graph.node("x_{t}", fillcolor="darkseagreen2") graph.node("h_{t-1}", fillcolor="darkseagreen2") graph.node("0", fillcolor="lightblue") graph.edge("x_{t}", "0", fillcolor="gray") graph.edge("h_{t-1}", "0", fillcolor="gray") _steps = self.num_steps # _steps = self.get_num_steps(cell_index) for i in range(1, 1 + _steps): graph.node(str(i), fillcolor="lightblue") for i, (op_type, from_, to_) in enumerate(geno_): edge_label = op_type if edge_labels is not None: edge_label = edge_label + "; " + str(edge_labels[i]) graph.edge(str(from_), str(to_), label=edge_label, fillcolor="gray") graph.node("h_{t}", fillcolor="palegoldenrod") for i in concat_: graph.edge(str(i), "h_{t}", fillcolor="gray") graph.render(filename, view=False) return [(genotypes[0][0], filename + ".{}".format(plot_format))]
def setup(self, load=None, save_every=None, save_controller_every=None, train_dir=None, writer=None, load_components=None, interleave_report_every=None): """ Setup the scaffold: saving/loading/visualization settings. """ if load is not None: all_components = ("controller", "evaluator", "trainer") load_components = all_components\ if load_components is None else load_components expect( set(load_components).issubset(all_components), "Invalid `load_components`") if "controller" in load_components: path = os.path.join(load, "controller") self.logger.info("Load controller from %s", path) try: self.controller.load(path) except Exception as e: self.logger.error("Controller not loaded! %s", e) if "evaluator" in load_components: path = os.path.join(load, "evaluator") # if os.path.exists(path): self.logger.info("Load evaluator from %s", path) try: self.evaluator.load(path) except Exception as e: self.logger.error("Evaluator not loaded: %s", e) if "trainer" in load_components: path = os.path.join(load, "trainer") # if os.path.exists(path): self.logger.info("Load trainer from %s", path) try: self.load(path) except Exception as e: self.logger.error("Trainer not loaded: %s", e) self.save_every = save_every self.save_controller_every = save_controller_every self.train_dir = utils.makedir( train_dir) if train_dir is not None else train_dir if writer is not None: self.setup_writer(writer.get_sub_writer("trainer")) self.controller.setup_writer(writer.get_sub_writer("controller")) self.evaluator.setup_writer(writer.get_sub_writer("evaluator")) self.interleave_report_every = interleave_report_every self.is_setup = True
def __init__(self, search_space, device, cell_index, num_lstm_layers=1, controller_hid=64, softmax_temperature=None, tanh_constant=1.1, op_tanh_reduce=2.5, force_uniform=False, schedule_cfg=None): super(BaseLSTM, self).__init__(search_space, device, schedule_cfg) self.cell_index = cell_index self.num_lstm_layers = num_lstm_layers self.controller_hid = controller_hid self.softmax_temperature = softmax_temperature self.tanh_constant = tanh_constant self.op_tanh_reduce = op_tanh_reduce self.force_uniform = force_uniform if self.cell_index is None: # parameters/inference for all cell group in one controller network if not self.search_space.cellwise_primitives: # the same set of primitives for different cg group self._primitives = self.search_space.shared_primitives else: # different set of primitives for different cg group _primitives = collections.OrderedDict() for csp in self.search_space.cell_shared_primitives: for name in csp: if name not in _primitives: _primitives[name] = len(_primitives) self._primitives = list(_primitives.keys()) self._cell_primitive_indexes = [[_primitives[name] for name in csp] \ for csp in self.search_space.cell_shared_primitives] self._num_steps = self.search_space.num_steps expect( isinstance(self._num_steps, int), "Shared RL network do not support using different steps in " "different cell groups") else: self._primitives = self.search_space.cell_shared_primitives[ self.cell_index] self._num_steps = self.search_space.get_num_steps(self.cell_index) self._num_primitives = len(self._primitives) self.lstm = nn.ModuleList() for _ in range(self.num_lstm_layers): self.lstm.append( nn.LSTMCell(self.controller_hid, self.controller_hid))