def test_serial_warm_start_multi_fidelity(self): optimizer = "ETPE" n_iterations = 15 n_jobs = 3 parallel_strategy = "AsyncComm" multi_fidelity_iter_generator = HyperBandIterGenerator(25, 100, 2) for warm_start_strategy in valid_warm_start_strategies: p_res = fmin( evaluation, config_space, optimizer=optimizer, n_jobs=n_jobs, n_iterations=n_iterations, parallel_strategy=parallel_strategy, multi_fidelity_iter_generator=multi_fidelity_iter_generator) print(p_res) for i in range(3): res = fmin( evaluation, config_space, warm_start_strategy=warm_start_strategy, n_jobs=n_jobs, n_iterations=n_iterations, previous_result=p_res, parallel_strategy=parallel_strategy, multi_fidelity_iter_generator=multi_fidelity_iter_generator ) p_res = res print(p_res) assert len(res["budget2obvs"][25]["losses"]) == 20 * (i + 2)
def test_warm_start_mapreduce(self): # fixme: resume strategy occur `runId not in runId2info` optimizer = "ETPE" n_iterations = 5 n_jobs = 3 parallel_strategy = "MapReduce" for warm_start_strategy in valid_warm_start_strategies: print(warm_start_strategy) p_res = fmin(evaluate, config_space, optimizer=optimizer, n_jobs=n_jobs, n_iterations=n_iterations, parallel_strategy=parallel_strategy) for i in range(3): res = fmin(evaluate, config_space, warm_start_strategy=warm_start_strategy, n_jobs=n_jobs, n_iterations=n_iterations, previous_result=p_res, parallel_strategy=parallel_strategy) p_res = res assert len( res["budget2obvs"][1]["losses"]) == n_iterations * (i + 2)
def ultraopt_cube_factory(objective, n_trials, n_dim, with_count, method): global feval_count feval_count = 0 HDL = dict([('u' + str(i), { "_type": "uniform", "_value": [0., 1.] }) for i in range(n_dim)]) def _objective(config: dict) -> float: global feval_count feval_count += 1 u = [config['u' + str(i)] for i in range(n_dim)] return objective(u) result = fmin(eval_func=_objective, config_space=HDL, optimizer=method, n_iterations=n_trials, n_jobs=1, show_progressbar=False, parallel_strategy="Serial") best_x = [result.best_config['u' + str(i)] for i in range(n_dim)] best_val = result.best_loss return (best_val, best_x, feval_count) if with_count else (best_val, best_x)
def test_serial_warm_start_serial(self): optimizer = "ETPE" n_iterations = 15 for warm_start_strategy in valid_warm_start_strategies: p_res = fmin( evaluation, config_space, optimizer=optimizer, n_jobs=1, n_iterations=n_iterations, ) for i in range(3): res = fmin(evaluation, config_space, warm_start_strategy=warm_start_strategy, n_jobs=1, n_iterations=n_iterations, previous_result=p_res) p_res = res assert len( res["budget2obvs"][1]["losses"]) == n_iterations * (i + 2)
def main(optimizer): for synthetic_function_cls in synthetic_functions: meta_info = synthetic_function_cls.get_meta_information() if "num_function_evals" in meta_info: max_iter = meta_info["num_function_evals"] else: max_iter = base_max_iter # 构造超参空间 config_space = ConfigurationSpace() config_space.generate_all_continuous_from_bounds( synthetic_function_cls.get_meta_information()['bounds']) synthetic_function = synthetic_function_cls() # 定义目标函数 def evaluation(config: dict): config = Configuration(config_space, values=config) return synthetic_function.objective_function(config)["function_value"] - \ synthetic_function.get_meta_information()["f_opt"] res = pd.DataFrame(columns=[f"trial-{i}" for i in range(repetitions)], index=range(max_iter)) print(meta_info["name"]) for trial in range(repetitions): random_state = base_random_state + trial * 10 ret = fmin(evaluation, config_space, optimizer=optimizer, random_state=random_state, n_iterations=max_iter) print(ret) losses = ret["budget2obvs"][1]["losses"] print(ret["best_loss"]) res[f"trial-{trial}"] = losses res = raw2min(res) m = res.mean(1) s = res.std(1) name = synthetic_function.get_meta_information()["name"] final_result[name] = { "mean": m.tolist(), "std": s.tolist(), "q25": res.quantile(0.25, 1).tolist(), "q75": res.quantile(0.75, 1).tolist(), "q90": res.quantile(0.90, 1).tolist() } Path(f"ultraopt_{optimizer}.json").write_text(json.dumps(final_result))
def test(self): for parallel_strategy in valid_parallel_strategies: print(parallel_strategy) optimizer = "ETPE" n_iterations = 11 n_jobs = 4 p_res = fmin(evaluation, config_space, optimizer=optimizer, n_jobs=n_jobs, n_iterations=n_iterations, parallel_strategy=parallel_strategy, checkpoint_file="checkpoint.pkl", checkpoint_freq=9, multi_fidelity_iter_generator=CustomIterGenerator( [4, 2, 1], [25, 50, 100])) res = load("checkpoint.pkl") assert p_res.budget2info == res.budget2info print(res)
def main(): for synthetic_function_cls in synthetic_functions: meta_info = synthetic_function_cls.get_meta_information() if "num_function_evals" in meta_info: max_iter = meta_info["num_function_evals"] else: max_iter = base_max_iter # 构造超参空间 config_space = ConfigurationSpace() config_space.generate_all_continuous_from_bounds( synthetic_function_cls.get_meta_information()['bounds']) synthetic_function = synthetic_function_cls() # 定义目标函数 def evaluation(config: dict): config = Configuration(config_space, values=config) return synthetic_function.objective_function(config)["function_value"] - \ synthetic_function.get_meta_information()["f_opt"] # 对experiment_param的删除等操作放在存储后面 res = pd.DataFrame(columns=[f"trial-{i}" for i in range(repetitions)], index=range(max_iter)) print(meta_info["name"]) for trial in range(repetitions): random_state = base_random_state + trial * 10 # 设置超参空间的随机种子(会影响后面的采样) ret = fmin(evaluation, config_space, optimizer=ETPEOptimizer(gamma2=0.95), random_state=random_state, n_iterations=max_iter) print(ret) losses = ret["budget2obvs"][1]["losses"] # print('iter | loss | config origin') # print('----------------------------') print(ret["best_loss"]) res[f"trial-{trial}"] = losses res = raw2min(res) m = res.mean(1) s = res.std(1) name = synthetic_function.get_meta_information()["name"] final_result[name] = {"mean": m.tolist(), "std": s.tolist()} Path("ultraopt.json").write_text(json.dumps(final_result))
def test_all_methods(self): for optimizer in valid_optimizers: for parallel_strategie in valid_parallel_strategies: if parallel_strategie == "AsyncComm": multi_fidelity_iter_generators = [ HyperBandIterGenerator(50, 100, 2), SuccessiveHalvingIterGenerator(50, 100, 2) ] else: multi_fidelity_iter_generators = [None] for multi_fidelity_iter_generator in multi_fidelity_iter_generators: print(optimizer, parallel_strategie, multi_fidelity_iter_generator) ret = fmin(evaluate, config_space, optimizer=optimizer, n_iterations=2, n_jobs=2, parallel_strategy=parallel_strategie, multi_fidelity_iter_generator= multi_fidelity_iter_generator) print(ret)
def main(): res = pd.DataFrame(columns=[f"trial-{i}" for i in range(repetitions)], index=range(max_iter)) for trial in range(repetitions): random_state = 50 + trial * 10 # 设置超参空间的随机种子(会影响后面的采样) config_space.seed(random_state) print("==========================") print(f"= Trial -{trial:01d}- =") print("==========================") # print('iter | loss | config origin') # print('----------------------------') ret = fmin(evaluation, config_space, optimizer=ETPEOptimizer(gamma1=0.95), random_state=random_state, n_iterations=max_iter) print(ret) losses = ret["budget2obvs"][1]["losses"] print(ret["best_loss"]) res[f"trial-{trial}"] = losses res.to_csv(f"{experiment}_7.csv", index=False) print(res.min()[:repetitions].mean())
def test_multi_rest_config_space(self): HDL = { "feature_engineer(choice)": { "feature_selection(choice)": { "wrapper(choice)": { "RandomForest": { "n_iterations": { "_type": "int_quniform", "_value": [10, 100, 10] }, "max_depth": { "_type": "int_quniform", "_value": [3, 7, 2] }, }, "LinearRegression": { "C": { "_type": "loguniform", "_value": [0.01, 10000], "_default": 1.0 }, }, }, "filter": { "score_func": { "_type": "choice", "_value": ["pearsonr", "spearmanr"] } } }, "PolynomialFeatures": { "degree": { "_type": "int_uniform", "_value": [2, 3] }, "interaction_only": { "_type": "choice", "_value": [True, False] }, }, "decomposition(choice)": { "PCA": { "n_components": { "_type": "uniform", "_value": [0.8, 0.95] }, "whiten": { "_type": "choice", "_value": [True, False] }, }, "KernelPCA": { "n_components": { "_type": "uniform", "_value": [0.8, 0.95] }, "whiten": { "_type": "choice", "_value": [True, False] }, }, "ICA": {} } } } config_space = hdl2cs(HDL) fmin(evaluate, config_space, "ETPE", n_iterations=30)
def test_conditions_and_fobidden(self): HDL = { "model(choice)": { "linearsvc": { "max_iter": { "_type": "int_quniform", "_value": [300, 3000, 100], "_default": 600 }, "penalty": { "_type": "choice", "_value": ["l1", "l2"], "_default": "l2" }, "dual": { "_type": "choice", "_value": [True, False], "_default": False }, "loss": { "_type": "choice", "_value": ["hinge", "squared_hinge"], "_default": "squared_hinge" }, "C": { "_type": "loguniform", "_value": [0.01, 10000], "_default": 1.0 }, "__forbidden": [ { "penalty": "l1", "loss": "hinge" }, { "penalty": "l2", "dual": False, "loss": "hinge" }, { "penalty": "l1", "dual": False }, { "penalty": "l1", "dual": True, "loss": "squared_hinge" }, ] }, "svc": { "C": { "_type": "loguniform", "_value": [0.01, 10000], "_default": 1.0 }, "kernel": { "_type": "choice", "_value": ["rbf", "poly", "sigmoid"], "_default": "rbf" }, "degree": { "_type": "int_uniform", "_value": [2, 5], "_default": 3 }, "gamma": { "_type": "loguniform", "_value": [1e-05, 8], "_default": 0.1 }, "coef0": { "_type": "quniform", "_value": [-1, 1], "_default": 0 }, "shrinking": { "_type": "choice", "_value": [True, False], "_default": True }, "__activate": { "kernel": { "rbf": ["gamma"], "sigmoid": ["gamma", "coef0"], "poly": ["degree", "gamma", "coef0"] } } }, "mock": { "C": { "_type": "loguniform", "_value": [0.01, 10000], "_default": 1.0 }, "kernel": { "_type": "choice", "_value": ["rbf", "poly", "sigmoid"], "_default": "rbf" }, "degree": { "_type": "int_uniform", "_value": [2, 5], "_default": 3 }, "gamma": { "_type": "loguniform", "_value": [1e-05, 8], "_default": 0.1 }, "coef0": { "_type": "quniform", "_value": [-1, 1], "_default": 0 }, "shrinking": { "_type": "choice", "_value": [True, False], "_default": True }, "__activate": { "kernel": { "rbf": ["gamma"], "sigmoid": ["gamma", "coef0"], "poly": ["degree", "gamma", "coef0"] } }, "max_iter": { "_type": "int_quniform", "_value": [300, 3000, 100], "_default": 600 }, "penalty": { "_type": "choice", "_value": ["l1", "l2"], "_default": "l2" }, "dual": { "_type": "choice", "_value": [True, False], "_default": False }, "loss": { "_type": "choice", "_value": ["hinge", "squared_hinge"], "_default": "squared_hinge" }, "__forbidden": [ { "penalty": "l1", "loss": "hinge" }, { "penalty": "l2", "dual": False, "loss": "hinge" }, { "penalty": "l1", "dual": False }, { "penalty": "l1", "dual": True, "loss": "squared_hinge" }, ] }, } } config_space = hdl2cs(HDL) fmin(evaluate, config_space, "ETPE", n_iterations=30)
CS = hdl2cs(HDL) g = plot_hdl(HDL) default_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=0) X, y = load_digits(return_X_y=True) class Evaluator(): def __init__(self, X, y, metric="accuracy", cv=default_cv): # 初始化 self.X = X self.y = y self.metric = metric self.cv = cv def __call__(self, config: dict) -> float: layered_dict = layering_config(config) AS_HP = layered_dict['classifier'].copy() AS, HP = AS_HP.popitem() ML_model = eval(AS)(**HP) scores = cross_val_score(ML_model, self.X, self.y, cv=self.cv, scoring=self.metric) score = scores.mean() return 1 - score evaluator = Evaluator(X, y) result = fmin(evaluator, HDL, optimizer="ETPE", n_iterations=40) print(result)
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Author : qichun tang # @Date : 2020-12-27 # @Contact : [email protected] from ultraopt import fmin from ultraopt.multi_fidelity import HyperBandIterGenerator from ultraopt.tests.mock import evaluate, config_space import pylab as plt res = fmin(evaluate, config_space, n_jobs=3, multi_fidelity_iter_generator=HyperBandIterGenerator(25, 100, 2)) res.plot_convergence_over_time(yscale="log") plt.show() res.plot_concurrent_over_time(num_points=100) plt.show() res.plot_finished_over_time() plt.show() res.plot_correlation_across_budgets() plt.show() print()
# ------------------------------------------------------------- # 采用在对【 5折交叉验证中的训练集 】进行采样的方法,采样率为 budget sample_ratio = budget scores = [] for i, (train_ix, valid_ix) in enumerate(self.cv.split(X, y)): rng = np.random.RandomState(i) size = int(train_ix.size * sample_ratio) train_ix = rng.choice(train_ix, size, replace=False) X_train = X[train_ix, :] y_train = y[train_ix] X_valid = X[valid_ix, :] y_valid = y[valid_ix] ML_model.fit(X_train, y_train) y_pred = ML_model.predict(X_valid) score = eval(f"sklearn.metrics.{self.metric}_score")(y_valid, y_pred) scores.append(score) # ------------------------------------------------------------- score = np.mean(scores) return 1 - score evaluator = Evaluator(X, y) iter_generator = HyperBandIterGenerator(min_budget=1 / 4, max_budget=1, eta=2) result = fmin(evaluator, HDL, optimizer="ETPE", n_iterations=30, multi_fidelity_iter_generator=iter_generator, n_jobs=3) print(result)
evaluator = UltraoptEvaluator(data, 'balanced_accuracy') CS = hdl2cs(HDL) repetitions = int(sys.argv[2]) max_iter = int(sys.argv[3]) n_startup_trials = int(sys.argv[4]) print( f"repetitions={repetitions}, max_iter={max_iter}, n_startup_trials={n_startup_trials}" ) res = pd.DataFrame(columns=[f"trial-{i}" for i in range(repetitions)], index=range(max_iter)) for trial in range(repetitions): optimizer = ETPEOptimizer(min_points_in_model=n_startup_trials, ) ret = fmin( evaluator, HDL, optimizer, random_state=trial * 10, n_iterations=max_iter, ) losses = ret["budget2obvs"][1]["losses"] res[f"trial-{trial}"] = losses res = raw2min(res) m = res.mean(1) s = res.std(1) final_result = { "global_min": evaluator.global_min, "mean": m.tolist(), "std": s.tolist(), "q10": res.quantile(0.10, 1).tolist(), "q25": res.quantile(0.25, 1).tolist(), "q75": res.quantile(0.75, 1).tolist(),
def objective_function(config: dict, budget: int = 100): loss, cost = b.objective_function(config, int(budget)) return float(loss) cs = b.get_configuration_space() HB = False if args.optimizer == "BOHB": optimizer = "ETPE" iter_generator = HyperBandIterGenerator(min_budget=3, max_budget=100, eta=3) HB = True elif args.optimizer == "HyperBand": optimizer = "Random" iter_generator = HyperBandIterGenerator(min_budget=3, max_budget=100, eta=3) HB = True else: optimizer = args.optimizer iter_generator = None fmin_result = fmin(objective_function, cs, optimizer, n_iterations=args.n_iters, random_state=args.run_id, multi_fidelity_iter_generator=iter_generator) print(fmin_result) # dump(fmin_result, os.path.join(output_path, 'run_%d.pkl' % args.run_id)) res = b.get_results() fh = open(os.path.join(output_path, 'run_%d.json' % args.run_id), 'w') json.dump(res, fh) fh.close() if HB: time.sleep(5)
print(dataset_id) data = pd.read_csv(f'processed_data/d{dataset_id}_processed.csv') HDL = get_HDL() evaluator = UltraoptEvaluator(data, 'balanced_accuracy') CS = hdl2cs(HDL) repetitions = int(sys.argv[2]) max_iter = int(sys.argv[3]) n_startup_trials = int(sys.argv[4]) res = pd.DataFrame(columns=[f"trial-{i}" for i in range(repetitions)], index=range(max_iter)) for trial in range(repetitions): ret = fmin( evaluator, HDL, "Random", random_state=trial * 10, n_iterations=200, ) losses = ret["budget2obvs"][1]["losses"] res[f"trial-{trial}"] = losses res = raw2min(res) m = res.mean(1) s = res.std(1) final_result = { "global_min": evaluator.global_min, "mean": m.tolist(), "std": s.tolist(), "q10": res.quantile(0.10, 1).tolist(), "q25": res.quantile(0.25, 1).tolist(), "q75": res.quantile(0.75, 1).tolist(),