def _add_reg(self, model): offset = self._check_bn_drop(model) reg_layers = [] for k in self.reg_params.keys(): if k in ["all_l2", "all_l1"]: l2_reg = False if k == "all_l2": l2_reg = True num_lin_layers = int(((len(self.model) - 2) / 2) + 1) j = 0 for i in range(num_lin_layers): space = self.reg_params[k] hyperp = sample_from(space) reg_layers.append((j, hyperp, l2_reg)) j += 2 + offset elif k.split('_', 1)[1] in ["l2", "l1"]: layer_num = int(k.split('_', 1)[0]) layer_num += (layer_num // 2) * (offset) l2_reg = True if k.split('_', 1)[1] == "l1": l2_reg = False space = self.reg_params[k] hyperp = sample_from(space) reg_layers.append((layer_num, hyperp, l2_reg)) else: pass model.new_params["reg_layers"] = reg_layers return reg_layers
def get_sequence(self, length): """Given a model (a `Grid`), return a sequence of observations and the corresponding states.""" H, W = self.shape states, observations = [], [] for t in range(length): # choose a random init state if t == 0: state = np.random.randint(H), np.random.randint(W) else: state = sample_from(self.get_neighbours(state)) o = sample_from(self.get_colors(state)) states.append(state) observations.append(o) return np.array(observations), states
def _get_optimizer(self, model): lr = self.def_lr name = self.def_optim if "optim" in self.optim_params: space = self.optim_params['optim'] name = sample_from(space) if "lr" in self.optim_params: space = self.optim_params['lr'] lr = sample_from(space) if name == "sgd": opt = SGD elif name == "adam": opt = Adam model.new_params["optim"] = name model.new_params["lr"] = lr optim = opt(model.parameters(), lr=lr) return optim
def run_config(self, model, num_iters): """ Train a particular hyperparameter configuration for a given number of iterations and evaluate the loss on the validation set. For hyperparameters that have previously been evaluated, resume from a previous checkpoint. Args ---- - model: the mutated model to train. - num_iters: an int indicating the number of iterations to train the model for. Returns ------- - val_loss: the lowest validaton loss achieved. """ try: ckpt = self._load_checkpoint(model.ckpt_name) model.load_state_dict(ckpt['state_dict']) except FileNotFoundError: pass model = model.to(self.device) # parse reg params reg_layers = self._add_reg(model) # setup train loader if self.data_loader is None: self.batch_hyper = True space = self.optim_params['batch_size'] batch_size = sample_from(space) tqdm.write("batch size: {}".format(batch_size)) self.data_loader = get_train_valid_loader( self.data_dir, self.args.name, batch_size, self.args.valid_size, self.args.shuffle, **self.kwargs) # training logic min_val_loss = 999999 counter = 0 num_epochs = int(num_iters) if self.epoch_scale else 1 num_passes = None if self.epoch_scale else num_iters for epoch in range(num_epochs): self._train_one_epoch(model, num_passes, reg_layers) val_loss = self._validate_one_epoch(model) if val_loss < min_val_loss: min_val_loss = val_loss counter = 0 else: counter += 1 if counter > self.patience: tqdm.write("[!] early stopped!!") model.early_stopped = True return min_val_loss if self.batch_hyper: self.data_loader = None state = { 'state_dict': model.state_dict(), 'min_val_loss': min_val_loss, } self._save_checkpoint(state, model.ckpt_name) return min_val_loss
def get_random_config(self): """ Build a mutated version of the user's model that incorporates the new hyperparameters settings defined by `hyperparams`. """ self.all_batchnorm = False self.all_drop = False new_params = {} if not self.net_params: mutated = self.model else: layers = [] used_acts = [] all_act = False all_drop = False all_batchnorm = False num_layers = len(self.model) i = 0 used_acts.append(self.model[1].__str__()) for layer_hp in self.net_params.keys(): layer, hp = layer_hp.split('_', 1) if layer.isdigit(): layer_num = int(layer) diff = layer_num - i if diff > 0: for j in range(diff + 1): layers.append(self.model[i + j]) i += diff if hp == 'act': space = find_key(self.net_params, '{}_act'.format(layer_num)) hyperp = sample_from(space) new_params["act"] = hyperp new_act = str2act(hyperp) used_acts.append(new_act.__str__()) layers.append(new_act) i += 1 elif hp == 'dropout': layers.append(self.model[i]) space = find_key(self.net_params, '{}_drop'.format(layer_num)) hyperp = sample_from(space) new_params["drop"] = hyperp layers.append(nn.Dropout(p=hyperp)) else: pass elif diff == 0: layers.append(self.model[i]) if hp == 'act': space = find_key(self.net_params, '{}_act'.format(layer_num)) hyperp = sample_from(space) new_params["act"] = hyperp new_act = str2act(hyperp) used_acts.append(new_act.__str__()) layers.append(new_act) i += 1 elif hp == 'dropout': i += 1 layers.append(self.model[i]) space = find_key(self.net_params, '{}_drop'.format(layer_num)) hyperp = sample_from(space) new_params["drop"] = hyperp layers.append(nn.Dropout(p=hyperp)) else: pass else: if hp == 'act': space = find_key(self.net_params, '{}_act'.format(layer_num)) hyperp = sample_from(space) new_params["act"] = hyperp new_act = str2act(hyperp) used_acts.append(new_act.__str__()) layers[i] = new_act elif hp == 'dropout': space = find_key(self.net_params, '{}_drop'.format(layer_num)) hyperp = sample_from(space) new_params["drop"] = hyperp layers.append(nn.Dropout(p=hyperp)) layers.append(self.model[i]) else: pass i += 1 else: if (i < num_layers) and (len(layers) < num_layers): for j in range(num_layers - i): layers.append(self.model[i + j]) i += 1 if layer == "all": if hp == "act": space = self.net_params['all_act'] hyperp = sample_from(space) all_act = False if hyperp == [0] else True elif hp == "dropout": space = self.net_params['all_dropout'] hyperp = sample_from(space) all_drop = False if hyperp == [0] else True elif hp == "batchnorm": space = self.net_params['all_batchnorm'] hyperp = sample_from(space) all_batchnorm = True if hyperp == 1 else False else: pass used_acts = sorted(set(used_acts), key=used_acts.index) if all_act: old_act = used_acts[0] space = self.net_params['all_act'][1][1] hyperp = sample_from(space) new_params["all_act"] = hyperp new_act = str2act(hyperp) used_acts.append(new_act.__str__()) for i, l in enumerate(layers): if l.__str__() == old_act: layers[i] = new_act if all_batchnorm: self.all_batchnorm = True new_params["all_batch"] = True target_acts = used_acts if not all_act else used_acts[1:] for i, l in enumerate(layers): if l.__str__() in target_acts: if 'Linear' in layers[i - 1].__str__(): bn = nn.BatchNorm2d(layers[i - 1].out_features) else: bn = nn.BatchNorm2d(layers[i - 1].out_channels) layers.insert(i + 1, bn) if 'Linear' in layers[-2].__str__(): bn = nn.BatchNorm2d(layers[i - 1].out_features) else: bn = nn.BatchNorm2d(layers[i - 1].out_channels) layers.insert(-1, bn) if all_drop: self.all_drop = True new_params["all_drop"] = True target_acts = used_acts if not all_act else used_acts[1:] space = self.net_params['all_dropout'][1][1] hyperp = sample_from(space) for i, l in enumerate(layers): if l.__str__() in target_acts: layers.insert(i + 1 + all_batchnorm, nn.Dropout(p=hyperp)) sizes = {} for k, v in self.size_params.items(): layer_num = int(k.split("_", 1)[0]) layer_num += (layer_num // 2) * (self.all_batchnorm + self.all_drop) hyperp = sample_from(v) new_params["{}_hidden_size".format(layer_num)] = hyperp sizes[layer_num] = hyperp for layer, size in sizes.items(): in_dim = layers[layer].in_features layers[layer] = nn.Linear(in_dim, size) if self.all_batchnorm: layers[layer + 2] = nn.BatchNorm2d(size) next_layer = layer + (2 + self.all_batchnorm + self.all_drop) out_dim = layers[next_layer].out_features layers[next_layer] = nn.Linear(size, out_dim) mutated = nn.Sequential(*layers) self._init_weights_biases(mutated) mutated.ckpt_name = str(uuid.uuid4().hex) mutated.new_params = new_params mutated.early_stopped = False return mutated
def sample_observation(self, state: int) -> int: return sample_from([(o, self.B[state, o]) for o in range(self.M)])
def sample_transition(self, from_state: int) -> int: return sample_from([(s, self.A[from_state, s]) for s in range(self.N)])
def sample_initial(self) -> float: return sample_from([(s, self.pi[s]) for s in range(self.N)])