示例#1
0
文件: mbhb.py 项目: Dee-Why/hp-tuner
    def choose_next(self, num_config):
        self.logger.info('LCNet: model-based choosing.')
        if len(self.incumbent_obj) <= 0:
            return sample_configurations(self.config_space, num_config)
        self.logger.info('start to training LCNet, training data shape: %s' % str(self.lc_training_x.shape))
        self.lcnet_model.train(self.lc_training_x, self.lc_training_y)

        next_configs = []
        random_configs = sample_configurations(self.config_space, 50 * self.num_config)
        random_configs_data = convert_configurations_to_array(random_configs)
        x_test = None
        for i in range(random_configs_data.shape[0]):
            x = np.concatenate((random_configs_data[i, None, :], np.array([[1.0]])), axis=1)
            if x_test is None:
                x_test = x
            else:
                x_test = np.concatenate((x_test, x), 0)
        m, v = self.lcnet_model.predict(x_test)
        sorted_configs = [random_configs[i] for i in np.argsort(-m)]
        print(sorted_configs[:5])
        number_flag = False
        for config in sorted_configs:
            if config not in next_configs:
                next_configs.append(config)
            if len(next_configs) == num_config:
                number_flag = True
                break
        if not number_flag:
            next_configs = expand_configurations(next_configs, self.config_space, num_config)
            self.logger.warning('MBHB: add random configuration here.' + '=' * 50)
        return next_configs
示例#2
0
    def choose_next(self, num_config):
        if len(self.target_y[self.iterate_r[-1]]) == 0:
            return sample_configurations(self.config_space, num_config)

        conf_cnt = 0
        next_configs = []
        total_cnt = 0

        incumbent = dict()
        max_r = self.iterate_r[-1]
        best_index = np.argmin(self.target_y[max_r])
        incumbent['config'] = self.target_x[max_r][best_index]
        approximate_obj = self.weighted_surrogate.predict(
            convert_configurations_to_array([incumbent['config']]))[0]
        incumbent['obj'] = approximate_obj
        self.weighted_acquisition_func.update(model=self.weighted_surrogate,
                                              eta=incumbent)

        while conf_cnt < num_config and total_cnt < 2 * num_config:
            rand_config = self.weighted_acq_optimizer.maximize(batch_size=1)[0]
            if rand_config not in next_configs:
                next_configs.append(rand_config)
                conf_cnt += 1
            total_cnt += 1
        if conf_cnt < num_config:
            next_configs = expand_configurations(next_configs,
                                                 self.config_space, num_config)
        return next_configs
示例#3
0
    def choose_next_weighted(self, num_config):
        if len(self.target_y[self.iterate_r[-1]]) == 0:
            return sample_configurations(self.config_space, num_config)

        conf_cnt = 0
        next_configs = []
        total_cnt = 0

        while conf_cnt < num_config and total_cnt < 2 * num_config:
            # in Bayesian optimization, eliminate epsilon sampling.
            incumbent = dict()
            # TODO: problem-->use the best in maximal resource.
            # TODO: smac's optmization algorithm.
            max_r = self.iterate_r[-1]
            best_index = np.argmin(self.target_y[max_r])
            incumbent['config'] = self.target_x[max_r][best_index]
            approximate_obj = self.weighted_surrogate.predict(
                convert_configurations_to_array([incumbent['config']]))[0]
            incumbent['obj'] = approximate_obj

            self.weighted_acquisition_func.update(
                model=self.weighted_surrogate, eta=incumbent)
            rand_config = self.weighted_acq_optimizer.maximize(batch_size=1)[0]

            if rand_config not in next_configs:
                next_configs.append(rand_config)
                conf_cnt += 1
            total_cnt += 1

        if conf_cnt < num_config:
            next_configs = expand_configurations(next_configs,
                                                 self.config_space, num_config)
        return next_configs
示例#4
0
文件: pbt.py 项目: Dee-Why/hp-tuner
    def iterate_parallel(self):
        # sample n_population configurations.
        T = sample_configurations(self.config_space, self.n_population)
        self.logger.info('-' * 20 + str([item.get_dictionary() for item in T]))
        step_num = 0
        result = []
        extra_info = None
        while step_num < self.iter_steps:
            self.logger.info('=' * 40 + ('start step: %d' % step_num) +
                             '=' * 40)
            # step_num += self.iter_gap
            step_num += 1
            performance_result, early_stops = self.run_in_parallel(
                T, self.iter_gap, extra_info)
            result = []
            for i, item in enumerate(performance_result):
                result.append({
                    'loss': item['loss'],
                    'worker_id': i,
                    'ref_id': item['ref_id']
                })
            T, extra_info = self.hp_iterate(T, result)
            self.logger.info(
                'p update: ' +
                str([item.get_dictionary().values() for item in T]))

        result_sorted = sorted(result, key=lambda x: x['loss'])
        self.incumbent_configs.append(T[result_sorted[0]['worker_id']])
        self.incumbent_obj.append(result_sorted[0]['loss'])
示例#5
0
def mini_smac(learn_delta):
    sample_num_m = s_mid
    sample_num_l = s_min
    if not learn_delta:
        sample_num_m = s_min

    start_time = time.time()
    config_space = create_configspace()
    types, bounds = get_types(config_space)
    num_hp = len(bounds)
    surrogate = RandomForestWithInstances(types=types, bounds=bounds)
    acquisition_func = EI(model=surrogate)
    acq_optimizer = RandomSampling(acquisition_func, config_space, n_samples=max(500, 50 * num_hp))
    X = []
    y = []
    y_delta = []
    c = []
    inc_y = 1.

    # Initial design.
    for _ in range(num_init):
        init_configs = sample_configurations(config_space, num_init)
        for config in init_configs:
            perf_t, _ = objective_function((config.get_dictionary(), sample_num_m))
            X.append(config)
            y.append(perf_t)
            if perf_t < inc_y:
                inc_y = perf_t
            c.append([time.time()-start_time, inc_y])
            if learn_delta:
                perf_l, _ = objective_function((config.get_dictionary(), sample_num_l))
                y_delta.append(perf_t - perf_l)
            else:
                y_delta.append(perf_t)

    # BO iterations.
    for _ in range(num_iter - num_init):
        # Update the surrogate model.
        surrogate.train(convert_configurations_to_array(X), np.array(y, dtype=np.float64))

        # Use EI acq to choose next config.
        incumbent = dict()
        best_index = np.argmin(y)
        incumbent['obj'] = y[best_index]
        incumbent['config'] = X[best_index]
        acquisition_func.update(model=surrogate, eta=incumbent)
        next_config = acq_optimizer.maximize(batch_size=1)[0]
        perf_t, _ = objective_function((next_config.get_dictionary(), sample_num_m))
        X.append(next_config)
        y.append(perf_t)
        if perf_t < inc_y:
            inc_y = perf_t
        c.append([time.time() - start_time, inc_y])
        if learn_delta:
            perf_l, _ = objective_function((config.get_dictionary(), sample_num_l))
            y_delta.append(perf_t - perf_l)
        else:
            y_delta.append(perf_t)

    return [convert_configurations_to_array(X), np.array(y_delta, dtype=np.float64)]
示例#6
0
文件: mfse.py 项目: Dee-Why/hp-tuner
    def choose_next_batch(self, num_config):
        if len(self.target_y[self.iterate_r[-1]]) == 0:
            configs = [self.config_space.sample_configuration()]
            configs.extend(
                sample_configurations(self.config_space, num_config - 1))
            self.configs.extend(configs)
            return configs

        config_candidates = list()
        acq_configs = self.get_bo_candidates(num_configs=2 * num_config)
        acq_idx = 0
        for idx in range(1, 1 + 2 * num_config):
            # Like BOHB, sample a fixed percentage of random configurations.
            if self.random_configuration_chooser.check(idx):
                _config = self.config_space.sample_configuration()
            else:
                _config = acq_configs[acq_idx]
                acq_idx += 1
            if _config not in config_candidates:
                config_candidates.append(_config)
            if len(config_candidates) >= num_config:
                break

        if len(config_candidates) < num_config:
            config_candidates = expand_configurations(config_candidates,
                                                      self.config_space,
                                                      num_config)

        _config_candidates = []
        for config in config_candidates:
            if config not in self.configs:  # Check if evaluated
                _config_candidates.append(config)
        self.configs.extend(_config_candidates)
        return _config_candidates
示例#7
0
    def choose_next(self, num_config):
        if len(self.incumbent_obj) < 2 * self.num_config:
            return sample_configurations(self.config_space, num_config)

        # print('choose next starts!')
        self.logger.info('train feature is: %s' % str(self.incumbent_configs[-5:]))
        self.logger.info('train target is: %s' % str(self.incumbent_obj))

        self.surrogate.train(convert_configurations_to_array(self.incumbent_configs),
                             np.array(self.incumbent_obj, dtype=np.float64))

        conf_cnt = 0
        total_cnt = 0
        next_configs = []
        while conf_cnt < num_config and total_cnt < 5 * num_config:
            incumbent = dict()
            best_index = np.argmin(self.incumbent_obj)
            incumbent['obj'] = self.incumbent_obj[best_index]
            incumbent['config'] = self.incumbent_configs[best_index]

            self.acquisition_func.update(model=self.surrogate, eta=incumbent)
            rand_config = self.acq_optimizer.maximize(batch_size=1)[0]
            if rand_config not in next_configs:
                next_configs.append(rand_config)
                conf_cnt += 1
            total_cnt += 1
        if conf_cnt < num_config:
            next_configs = expand_configurations(next_configs, self.config_space, num_config)
        return next_configs
示例#8
0
    def iterate(self, skip_last=0):
        for s in reversed(range(self.s_max + 1)):
            # Initial number of configurations
            n = int(ceil(self.B / self.max_iter / (s + 1) * self.eta**s))
            # Initial number of iterations per config
            r = self.max_iter * self.eta**(-s)

            # Sample n configurations uniformly.
            T = sample_configurations(self.configuration_space, n)
            incumbent_loss = np.inf
            extra_info = None
            last_run_num = None
            for i in range((s + 1) - int(skip_last)):  # Changed from s + 1
                # Run each of the n configs for <iterations>
                # and keep best (n_configs / eta) configurations.

                n_configs = n * self.eta**(-i)
                n_iterations = r * self.eta**(i)
                n_iter = n_iterations
                if last_run_num is not None and not self.restart_needed:
                    n_iter -= last_run_num
                last_run_num = n_iterations

                self.logger.info("HB: %d configurations x %d iterations each" %
                                 (int(n_configs), int(n_iterations)))

                ret_val, early_stops = self.run_in_parallel(
                    T, n_iter, extra_info)
                val_losses = [item['loss'] for item in ret_val]
                ref_list = [item['ref_id'] for item in ret_val]

                # select a number of best configurations for the next loop
                # filter out early stops, if any
                indices = np.argsort(val_losses)
                if len(T) == sum(early_stops):
                    break
                if len(T) >= self.eta:
                    T = [T[i] for i in indices if not early_stops[i]]
                    extra_info = [
                        ref_list[i] for i in indices if not early_stops[i]
                    ]
                    reduced_num = int(n_configs / self.eta)
                    T = T[0:reduced_num]
                    extra_info = extra_info[0:reduced_num]
                else:
                    T = [T[indices[0]]]
                    extra_info = [ref_list[indices[0]]]
                incumbent_loss = val_losses[indices[0]]
                self.add_stage_history(
                    self.stage_id, min(self.global_incumbent, incumbent_loss))
                self.stage_id += 1
            if not np.isnan(incumbent_loss):
                self.incumbent_configs.append(T[0])
                self.incumbent_perfs.append(incumbent_loss)
            self.remove_immediate_model()
示例#9
0
    def choose_next(self, num_config):
        if len(self.incumbent_obj) < 3:
            return sample_configurations(self.config_space, num_config)

        self.logger.info('BO Training - X: %s' %
                         str(self.incumbent_configs[-5:]))
        self.logger.info('BO Training - Y: %s' % str(self.incumbent_obj))
        self.surrogate.train(
            convert_configurations_to_array(self.incumbent_configs),
            np.array(self.incumbent_obj, dtype=np.float64))

        conf_cnt = 0
        total_cnt = 0
        _next_configs = []
        while conf_cnt < num_config and total_cnt < 5 * num_config:
            incumbent = dict()
            best_index = np.argmin(self.incumbent_obj)
            incumbent['obj'] = self.incumbent_obj[best_index]
            incumbent['config'] = self.incumbent_configs[best_index]

            self.acquisition_func.update(model=self.surrogate, eta=incumbent)
            rand_config = self.acq_optimizer.maximize(batch_size=1)[0]
            if rand_config not in _next_configs:
                _next_configs.append(rand_config)
                conf_cnt += 1
            total_cnt += 1
        if conf_cnt < num_config:
            _next_configs = expand_configurations(_next_configs,
                                                  self.config_space,
                                                  num_config)

        next_configs = []

        # Epsilon greedy
        for config in _next_configs:
            if random.random() < self.p:
                next_configs.append(
                    sample_configurations(self.config_space, 1)[0])
            else:
                next_configs.append(config)

        return next_configs
示例#10
0
    def iterate(self):
        configs = sample_configurations(self.config_space, self.num_workers)
        extra_info = None
        ret_val, early_stops = self.run_in_parallel(configs, self.R,
                                                    extra_info)
        val_losses = [item['loss'] for item in ret_val]

        self.incumbent_configs.extend(configs)
        self.incumbent_obj.extend(val_losses)
        self.add_stage_history(self.stage_id, self.global_incumbent)
        self.stage_id += 1
        self.remove_immediate_model()
示例#11
0
    def choose_next(self, num_config, r, mode):
        # different types of mode.
        if mode == 'Hybrid':
            mode = 'Backward' if self.iterate_id % 2 == 0 else 'Forward'

        if mode == 'Forward':
            if r != self.R:
                r *= self.eta
        elif mode == 'Backward':
            if r != 1:
                r /= self.eta
            else:
                r = self.R

        # TODO: in different types, this condition may not needed any more.
        n_exp = len(self.target_y[r])
        if n_exp < 2 * self.num_config:
            return sample_configurations(self.config_space, num_config)

        self.logger.info('train feature is: %s' % str(self.target_x[r]))
        self.logger.info('train target is: %s' % str(self.target_y[r]))

        self.surrogate.train(convert_configurations_to_array(self.target_x[r]),
                             np.array(self.target_y[r], dtype=np.float64))

        conf_cnt = 0
        next_configs = []
        total_cnt = 0
        # TODO: acceleration, maximize a batch of candidates.
        while conf_cnt < num_config and total_cnt < 5 * num_config:
            rand_config = None
            if random.uniform(0, 1) < self.init_tradeoff:
                rand_config = self.config_space.sample_configuration(1)
            else:
                # print('use surrogate to produce candidate.')
                incumbent = dict()
                incumbent['obj'] = np.min(self.target_y[r])
                incumbent['config'] = self.target_x[r][np.argmin(
                    self.target_y[r])]

                self.acquisition_func.update(model=self.surrogate,
                                             eta=incumbent)
                rand_config = self.acq_optimizer.maximize(batch_size=1)[0]
            if rand_config not in next_configs:
                next_configs.append(rand_config)
                conf_cnt += 1
            total_cnt += 1

        if conf_cnt < num_config:
            next_configs = expand_configurations(next_configs,
                                                 self.config_space, num_config)

        return next_configs
示例#12
0
文件: pbt.py 项目: Dee-Why/hp-tuner
 def get_neighbour_hp(self, T, worker_id):
     neighbours = get_random_neighborhood(T[worker_id],
                                          10 * self.n_population,
                                          self.rand_int)
     for item in neighbours:
         if item not in T:
             return item
     # for _ in range(self.n_population):
     #     hp = random.choice(neighbours)
     #     if hp not in T:
     #         return hp
     return sample_configurations(self.config_space, 1)[0]
示例#13
0
文件: pbt.py 项目: Dee-Why/hp-tuner
    def iterate(self):
        # sample n_population configurations.
        T = sample_configurations(self.config_space, self.n_population)
        step_num = 0
        result = []
        while step_num < self.iter_steps:
            step_num += self.iter_gap
            result = []
            for worker in self.workers:
                res = worker.step(step_num, T[worker.worker_id])
                result.append(res)
            T = self.hp_iterate(T, result)

        result_sorted = sorted(result, key=lambda x: x['loss'])
        self.incumbent_configs.append(T[result_sorted[0]['worker_id']])
        self.incumbent_obj.append(result_sorted[0]['loss'])
示例#14
0
    def choose_next(self, num_config):
        if len(self.incumbent_obj) < 3:
            return sample_configurations(self.config_space, num_config)

        self.logger.info('Train feature is: %s' %
                         str(self.incumbent_configs[:5]))
        self.logger.info('Train target is: %s' % str(self.incumbent_obj))
        self.surrogate.train(
            convert_configurations_to_array(self.incumbent_configs),
            np.array(self.incumbent_obj, dtype=np.float64))

        config_cnt = 0
        total_sample_cnt = 0
        config_candidates = []
        while config_cnt < num_config and total_sample_cnt < 3 * num_config:
            if random.random() < self.p:
                rand_config = self.config_space.sample_configuration(1)
            else:
                # print('use surrogate to produce candidate.')
                incumbent = dict()
                best_index = np.argmin(self.incumbent_obj)
                incumbent['obj'] = self.incumbent_obj[best_index]
                incumbent['config'] = self.incumbent_configs[best_index]

                self.acquisition_func.update(model=self.surrogate,
                                             eta=incumbent)
                rand_config = self.acq_optimizer.maximize(batch_size=1)[0]
            if rand_config not in config_candidates:
                config_candidates.append(rand_config)
                config_cnt += 1
            total_sample_cnt += 1
        if config_cnt < num_config:
            config_candidates = expand_configurations(config_candidates,
                                                      self.config_space,
                                                      num_config)
        return config_candidates
示例#15
0
def tse(run_id, train_base_models=True):
    start_time = time.time()

    from concurrent.futures import ProcessPoolExecutor
    pool = ProcessPoolExecutor(max_workers=args.worker)
    X, y = [], []
    c = []
    inc = 1.
    X_l, y_l = [], []

    weight = np.array([1/K]*(K+1))
    config_evaluated = []
    config_space = create_configspace()
    # Initialize config L.
    config_L = sample_configurations(config_space, num_L_init)

    if train_base_models:
        func_configs = list()
        for iter_t in range(K):
            print('Build mid fidelity model', iter_t)
            func_configs.append(True)
        func_configs.append(False)
        training_data = run_parallel_async(pool, mini_smac, func_configs)
        with open('data/xgb/base_tse_data_%d.pkl' % run_id, 'wb') as f:
            pickle.dump(training_data, f)
    else:
        with open('data/xgb/base_tse_data_%d.pkl' % 10, 'rb') as f:
            training_data = pickle.load(f)
        print('Load training data for M evaluations!')

    # Create base models.
    base_models = list()
    config_space = create_configspace()
    types, bounds = get_types(config_space)
    for iter_t in range(K+1):
        config_x, config_y = training_data[iter_t]
        model = RandomForestWithInstances(types=types, bounds=bounds)
        model.train(config_x, config_y)
        base_models.append(model)
    low_fidelity_model = base_models[K]
    X_l.extend(training_data[K][0].tolist())
    y_l.extend(training_data[K][1].tolist())
    print('Base model building finished!')

    # The framework of TSE.
    for iter_t in range(iter_H):
        print('Iteration in TSE', iter_t)
        # Sample a batch of configurations according to tse model.
        configs = sample_configurations(config_space, iter_L * 10)
        config_arrays = convert_configurations_to_array(configs)
        perfs, _ = low_fidelity_model.predict(config_arrays)
        perfs = perfs[:, 0]
        if len(y) > 3:
            preds = []
            for i in range(K):
                m, _ = base_models[i].predict(config_arrays)
                preds.append(m[:, 0].tolist())
            preds = np.array(preds).T
            preds = np.mat(np.hstack((preds, np.ones((len(configs), 1)))))
            # Add the delta.
            delta = preds*np.mat(weight.reshape(-1, 1))
            perfs += delta.getA()[:, 0]
        configs_candidate = []
        indexes = np.argsort(perfs)[:iter_L]
        for index in indexes:
            configs_candidate.append(configs[index])

        # Evaluate the low-fidelity configurations.
        print('='*10 + 'Evaluating the low-fidelity configurations')
        config_params = []
        for config in configs_candidate:
            config_params.append((config.get_dictionary(), s_min))

        result_perf = run_parallel_async(pool, objective_function, config_params)

        for index, item in enumerate(result_perf):
            X_l.append(configs_candidate[index].get_array().tolist())
            y_l.append(item[0])

        print(np.array(X_l).shape, np.array(y_l, dtype=np.float64).shape)
        # Update f_L.
        print('=' * 10 + 'Retrain the f_L')
        low_fidelity_model.train(np.array(X_l), np.array(y_l, dtype=np.float64))
        config_L.extend(configs_candidate)

        configs_input = []
        for config in config_L:
            if config not in config_evaluated:
                configs_input.append(config)

        # Choose the next configuration.
        config_arrays = convert_configurations_to_array(configs_input)
        perfs, _ = low_fidelity_model.predict(config_arrays)
        perfs = perfs[:, 0]
        if len(y) > 3:
            preds = []
            for i in range(K):
                m, _ = base_models[i].predict(config_arrays)
                preds.append(m[:, 0].tolist())
            preds = np.array(preds).T
            preds = np.mat(np.hstack((preds, np.ones((len(configs_input), 1)))))
            # Add the delta.
            delta = preds * np.mat(weight.reshape(-1, 1))
            perfs += delta.getA()[:, 0]
        next_config = configs_input[np.argmin(perfs)]

        # Evaluate this config with a high-fidelity setting.
        print('=' * 10 + 'Evaluate the high-fidelity configuration')
        perf, _ = objective_function((next_config.get_dictionary(), s_max))
        X.append(next_config)
        y.append(perf)
        if perf < inc:
            inc = perf
        c.append([time.time()-start_time, inc])
        print('Current inc', inc)

        if len(y) < 3:
            continue
        # Learn the weight in TSE.
        Z = []
        for i in range(K):
            m, v = base_models[i].predict(convert_configurations_to_array(X))
            Z.append(m[:, 0].tolist())
        Z = np.mat(np.hstack((np.array(Z).T, np.ones((len(y), 1)))))
        f = np.mat(np.array(y).reshape((-1, 1)))
        # Compute the weight.
        try:
            ZtZ_inv = np.linalg.inv(Z.T * Z)
            weight = (ZtZ_inv * Z.T * f)[:, 0]
            print('The weight updated is', weight)
        except np.linalg.LinAlgError as err:
            if 'Singular matrix' in str(err):
                print('Singular matrix encountered, and do not update the weight!')
            else:
                raise ValueError('Unexpected error!')

        # Save the result.
        np.save('data/xgb/tse_%d.npy' % run_id, np.array(c))
        plt.plot(np.array(c)[:, 0], np.array(c)[:, 1])
        plt.xlabel('time_elapsed (s)')
        plt.ylabel('validation error')
        plt.savefig("data/xgb/tse_%d.png" % run_id)
        if time.time() - start_time > 21600:
            raise ValueError('Runtime budget meets!')

    pool.shutdown(wait=True)