예제 #1
0
 def _add_reg(self, model):
     offset = self._check_bn_drop(model)
     reg_layers = []
     for k in self.reg_params.keys():
         if k in ["all_l2", "all_l1"]:
             l2_reg = False
             if k == "all_l2":
                 l2_reg = True
             num_lin_layers = int(((len(self.model) - 2) / 2) + 1)
             j = 0
             for i in range(num_lin_layers):
                 space = self.reg_params[k]
                 hyperp = sample_from(space)
                 reg_layers.append((j, hyperp, l2_reg))
                 j += 2 + offset
         elif k.split('_', 1)[1] in ["l2", "l1"]:
             layer_num = int(k.split('_', 1)[0])
             layer_num += (layer_num // 2) * (offset)
             l2_reg = True
             if k.split('_', 1)[1] == "l1":
                 l2_reg = False
             space = self.reg_params[k]
             hyperp = sample_from(space)
             reg_layers.append((layer_num, hyperp, l2_reg))
         else:
             pass
     model.new_params["reg_layers"] = reg_layers
     return reg_layers
예제 #2
0
    def get_sequence(self, length):
        """Given a model (a `Grid`), return a sequence of observations and the corresponding states."""
        H, W = self.shape

        states, observations = [], []
        for t in range(length):
            # choose a random init state
            if t == 0:
                state = np.random.randint(H), np.random.randint(W)
            else:
                state = sample_from(self.get_neighbours(state))
            o = sample_from(self.get_colors(state))
            states.append(state)
            observations.append(o)

        return np.array(observations), states
예제 #3
0
 def _get_optimizer(self, model):
     lr = self.def_lr
     name = self.def_optim
     if "optim" in self.optim_params:
         space = self.optim_params['optim']
         name = sample_from(space)
     if "lr" in self.optim_params:
         space = self.optim_params['lr']
         lr = sample_from(space)
     if name == "sgd":
         opt = SGD
     elif name == "adam":
         opt = Adam
     model.new_params["optim"] = name
     model.new_params["lr"] = lr
     optim = opt(model.parameters(), lr=lr)
     return optim
예제 #4
0
    def run_config(self, model, num_iters):
        """
        Train a particular hyperparameter configuration for a
        given number of iterations and evaluate the loss on the
        validation set.

        For hyperparameters that have previously been evaluated,
        resume from a previous checkpoint.

        Args
        ----
        - model: the mutated model to train.
        - num_iters: an int indicating the number of iterations
          to train the model for.

        Returns
        -------
        - val_loss: the lowest validaton loss achieved.
        """
        try:
            ckpt = self._load_checkpoint(model.ckpt_name)
            model.load_state_dict(ckpt['state_dict'])
        except FileNotFoundError:
            pass

        model = model.to(self.device)

        # parse reg params
        reg_layers = self._add_reg(model)

        # setup train loader
        if self.data_loader is None:
            self.batch_hyper = True
            space = self.optim_params['batch_size']
            batch_size = sample_from(space)
            tqdm.write("batch size: {}".format(batch_size))
            self.data_loader = get_train_valid_loader(
                self.data_dir, self.args.name, batch_size,
                self.args.valid_size, self.args.shuffle, **self.kwargs)

        # training logic
        min_val_loss = 999999
        counter = 0
        num_epochs = int(num_iters) if self.epoch_scale else 1
        num_passes = None if self.epoch_scale else num_iters
        for epoch in range(num_epochs):
            self._train_one_epoch(model, num_passes, reg_layers)
            val_loss = self._validate_one_epoch(model)
            if val_loss < min_val_loss:
                min_val_loss = val_loss
                counter = 0
            else:
                counter += 1
            if counter > self.patience:
                tqdm.write("[!] early stopped!!")
                model.early_stopped = True
                return min_val_loss
        if self.batch_hyper:
            self.data_loader = None
        state = {
            'state_dict': model.state_dict(),
            'min_val_loss': min_val_loss,
        }
        self._save_checkpoint(state, model.ckpt_name)
        return min_val_loss
예제 #5
0
    def get_random_config(self):
        """
        Build a mutated version of the user's model that
        incorporates the new hyperparameters settings defined
        by `hyperparams`.
        """
        self.all_batchnorm = False
        self.all_drop = False
        new_params = {}

        if not self.net_params:
            mutated = self.model
        else:
            layers = []
            used_acts = []
            all_act = False
            all_drop = False
            all_batchnorm = False
            num_layers = len(self.model)

            i = 0
            used_acts.append(self.model[1].__str__())
            for layer_hp in self.net_params.keys():
                layer, hp = layer_hp.split('_', 1)
                if layer.isdigit():
                    layer_num = int(layer)
                    diff = layer_num - i
                    if diff > 0:
                        for j in range(diff + 1):
                            layers.append(self.model[i + j])
                        i += diff
                        if hp == 'act':
                            space = find_key(self.net_params,
                                             '{}_act'.format(layer_num))
                            hyperp = sample_from(space)
                            new_params["act"] = hyperp
                            new_act = str2act(hyperp)
                            used_acts.append(new_act.__str__())
                            layers.append(new_act)
                            i += 1
                        elif hp == 'dropout':
                            layers.append(self.model[i])
                            space = find_key(self.net_params,
                                             '{}_drop'.format(layer_num))
                            hyperp = sample_from(space)
                            new_params["drop"] = hyperp
                            layers.append(nn.Dropout(p=hyperp))
                        else:
                            pass
                    elif diff == 0:
                        layers.append(self.model[i])
                        if hp == 'act':
                            space = find_key(self.net_params,
                                             '{}_act'.format(layer_num))
                            hyperp = sample_from(space)
                            new_params["act"] = hyperp
                            new_act = str2act(hyperp)
                            used_acts.append(new_act.__str__())
                            layers.append(new_act)
                            i += 1
                        elif hp == 'dropout':
                            i += 1
                            layers.append(self.model[i])
                            space = find_key(self.net_params,
                                             '{}_drop'.format(layer_num))
                            hyperp = sample_from(space)
                            new_params["drop"] = hyperp
                            layers.append(nn.Dropout(p=hyperp))
                        else:
                            pass
                    else:
                        if hp == 'act':
                            space = find_key(self.net_params,
                                             '{}_act'.format(layer_num))
                            hyperp = sample_from(space)
                            new_params["act"] = hyperp
                            new_act = str2act(hyperp)
                            used_acts.append(new_act.__str__())
                            layers[i] = new_act
                        elif hp == 'dropout':
                            space = find_key(self.net_params,
                                             '{}_drop'.format(layer_num))
                            hyperp = sample_from(space)
                            new_params["drop"] = hyperp
                            layers.append(nn.Dropout(p=hyperp))
                            layers.append(self.model[i])
                        else:
                            pass
                    i += 1
                else:
                    if (i < num_layers) and (len(layers) < num_layers):
                        for j in range(num_layers - i):
                            layers.append(self.model[i + j])
                        i += 1
                    if layer == "all":
                        if hp == "act":
                            space = self.net_params['all_act']
                            hyperp = sample_from(space)
                            all_act = False if hyperp == [0] else True
                        elif hp == "dropout":
                            space = self.net_params['all_dropout']
                            hyperp = sample_from(space)
                            all_drop = False if hyperp == [0] else True
                        elif hp == "batchnorm":
                            space = self.net_params['all_batchnorm']
                            hyperp = sample_from(space)
                            all_batchnorm = True if hyperp == 1 else False
                        else:
                            pass

            used_acts = sorted(set(used_acts), key=used_acts.index)

            if all_act:
                old_act = used_acts[0]
                space = self.net_params['all_act'][1][1]
                hyperp = sample_from(space)
                new_params["all_act"] = hyperp
                new_act = str2act(hyperp)
                used_acts.append(new_act.__str__())
                for i, l in enumerate(layers):
                    if l.__str__() == old_act:
                        layers[i] = new_act
            if all_batchnorm:
                self.all_batchnorm = True
                new_params["all_batch"] = True
                target_acts = used_acts if not all_act else used_acts[1:]
                for i, l in enumerate(layers):
                    if l.__str__() in target_acts:
                        if 'Linear' in layers[i - 1].__str__():
                            bn = nn.BatchNorm2d(layers[i - 1].out_features)
                        else:
                            bn = nn.BatchNorm2d(layers[i - 1].out_channels)
                        layers.insert(i + 1, bn)
                if 'Linear' in layers[-2].__str__():
                    bn = nn.BatchNorm2d(layers[i - 1].out_features)
                else:
                    bn = nn.BatchNorm2d(layers[i - 1].out_channels)
                layers.insert(-1, bn)
            if all_drop:
                self.all_drop = True
                new_params["all_drop"] = True
                target_acts = used_acts if not all_act else used_acts[1:]
                space = self.net_params['all_dropout'][1][1]
                hyperp = sample_from(space)
                for i, l in enumerate(layers):
                    if l.__str__() in target_acts:
                        layers.insert(i + 1 + all_batchnorm,
                                      nn.Dropout(p=hyperp))

            sizes = {}
            for k, v in self.size_params.items():
                layer_num = int(k.split("_", 1)[0])
                layer_num += (layer_num // 2) * (self.all_batchnorm +
                                                 self.all_drop)
                hyperp = sample_from(v)
                new_params["{}_hidden_size".format(layer_num)] = hyperp
                sizes[layer_num] = hyperp

            for layer, size in sizes.items():
                in_dim = layers[layer].in_features
                layers[layer] = nn.Linear(in_dim, size)
                if self.all_batchnorm:
                    layers[layer + 2] = nn.BatchNorm2d(size)
                next_layer = layer + (2 + self.all_batchnorm + self.all_drop)
                out_dim = layers[next_layer].out_features
                layers[next_layer] = nn.Linear(size, out_dim)

            mutated = nn.Sequential(*layers)

        self._init_weights_biases(mutated)
        mutated.ckpt_name = str(uuid.uuid4().hex)
        mutated.new_params = new_params
        mutated.early_stopped = False
        return mutated
예제 #6
0
 def sample_observation(self, state: int) -> int:
     return sample_from([(o, self.B[state, o]) for o in range(self.M)])
예제 #7
0
 def sample_transition(self, from_state: int) -> int:
     return sample_from([(s, self.A[from_state, s]) for s in range(self.N)])
예제 #8
0
 def sample_initial(self) -> float:
     return sample_from([(s, self.pi[s]) for s in range(self.N)])