def test_minimizer_api_dummy_minimize(verbose, call): # dummy_minimize is special as it does not support all parameters # and does not fit any models n_calls = 7 result = dummy_minimize(branin, [(-5.0, 10.0), (0.0, 15.0)], n_calls=n_calls, random_state=1, verbose=verbose, callback=call) assert result.models == [] check_minimizer_api(result, n_calls) check_minimizer_bounds(result, n_calls) with pytest.raises(ValueError): dummy_minimize(lambda x: x, [[-5, 10]])
def test_minimizer_api(): # dummy_minimize is special as it does not support all parameters # and does not fit any models call_single = lambda res: res.x call_list = [call_single, check_result_callable] for verbose, call in product([True, False], [call_single, call_list]): result = dummy_minimize(branin, [(-5.0, 10.0), (0.0, 15.0)], n_calls=7, random_state=1, verbose=verbose, callback=call) assert (result.models is None) yield (check_minimizer_api, result) yield (check_minimizer_bounds, result) assert_raise_message(ValueError, "return a scalar", dummy_minimize, lambda x: x, [[-5, 10]]) n_calls = 7 n_random_starts = 3 n_models = n_calls - n_random_starts for minimizer in MINIMIZERS: result = minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)], n_random_starts=n_random_starts, n_calls=n_calls, random_state=1, verbose=verbose, callback=call) yield (check_minimizer_api, result, n_models) yield (check_minimizer_bounds, result) assert_raise_message(ValueError, "return a scalar", minimizer, lambda x: x, [[-5, 10]])
def test_dummy_categorical_integer(): def f(params): return 0 dims = [[1]] res = dummy_minimize(f, dims, n_calls=1, random_state=1) assert res.x_iters[0][0] == dims[0][0]
def _gen_trial_vals(dims, num_trials, random_seed): import skopt res = skopt.dummy_minimize(lambda *args: 0, dims, n_calls=num_trials, random_state=random_seed) return res.x_iters
def __optimize_n_clusters__(self): max_iter = self.cluster_max - self.cluster_min ntests = int(0.5 * (self.cluster_max - self.cluster_min)) if self.method == "exhaustive" or max_iter < 30: # No-Optimize Full Test result = self.__cluster_metric__(self.cluster_min) best = self.cluster_min for k_val in range(self.cluster_min, self.cluster_max): run_result = self.__cluster_metric__([k_val]) if run_result < result: result = run_result best = k_val return result, best, self.cluster_max - self.cluster_min elif self.method == "gprocess": # Gaussian Opt. # gp_minimize is a gaussian implementation similar to sklearn GridSearch res = gp_minimize(self.__cluster_metric__, [(self.cluster_min, self.cluster_max)], n_calls=ntests) # res.fun #score # res.func_vals #all tested scores return res.fun, res.x[0], res.x_iters elif self.method == "dtree": # Decision Tree Opt. res = forest_minimize(self.__cluster_metric__, [(self.cluster_min, self.cluster_max)], base_estimator='RF', n_calls=ntests) # res.fun #score # res.func_vals #all tested scores return res.fun, res.x[0], res.x_iters elif self.method == "dummy": # Random Opt. res = dummy_minimize(self.__cluster_metric__, [(self.cluster_min, self.cluster_max)], n_calls=ntests) return res.fun, res.x[0], res.x_iters
def main(argv): if not FLAGS.force: check_commit() os.makedirs(FLAGS.log, exist_ok=True) path_history = os.path.join(FLAGS.log, HISTORY_SHEET_NAME) name_templ, train_templ, eval_templ, params, spaces, preprocess, optimizer = parse_yaml(FLAGS.yaml) if FLAGS.verbose: for param, space in zip(params, spaces): print("{}: {}".format(param, space)) upload = not FLAGS['dry-run'].value job = Job( name_templ, train_templ, eval_templ, params, preprocess, upload=upload, verbose=FLAGS.verbose, debug=FLAGS.debug) callbacks = [LogCallback(params=params, path_out=path_history)] if not optimizer["maximize"]: func = job else: func = lambda x: -1 * job(x) if optimizer["type"] == "bayesian": res = gp_minimize(func, spaces, callback=callbacks, **optimizer["config"]) elif optimizer["type"] == "random": res = dummy_minimize(func, spaces, callback=callbacks, **optimizer["config"]) else: raise ValueError("minimizer {} is invalid".format(optimizer["type"]))
def test_minimizer_api(): # dummy_minimize is special as it does not support all parameters # and does not fit any models call_single = lambda res: res.x call_list = [call_single, check_result_callable] for verbose, call in product([True, False], [call_single, call_list]): result = dummy_minimize(branin, [(-5.0, 10.0), (0.0, 15.0)], n_calls=7, random_state=1, verbose=verbose, callback=call) assert(result.models is None) yield (check_minimizer_api, result) yield (check_minimizer_bounds, result) assert_raise_message(ValueError, "return a scalar", dummy_minimize, lambda x: x, [[-5, 10]]) n_calls = 7 n_random_starts = 3 n_models = n_calls - n_random_starts for minimizer in MINIMIZERS: result = minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)], n_random_starts=n_random_starts, n_calls=n_calls, random_state=1, verbose=verbose, callback=call) yield (check_minimizer_api, result, n_models) yield (check_minimizer_bounds, result) assert_raise_message(ValueError, "return a scalar", minimizer, lambda x: x, [[-5, 10]])
def _random_trial(state): import skopt res = skopt.dummy_minimize(lambda *args: 0, state.flag_dims, n_calls=1, random_state=state.random_state) state.random_state = res.random_state return trial_flags(state.flag_names, res.x_iters[-1])
def _random_trial(state): import skopt res = skopt.dummy_minimize(lambda *args: 0, state.dims, n_calls=1, random_state=state.random_state) state.update(res) return state.next_trial_flags()
def run(args): # Create base serialization dir if not os.path.exists(args.serialization_dir): os.makedirs(args.serialization_dir) # Read in search configuration and create the blackbox function to optimize f, dimensions, x0, trial_paths, delete_worse_files_cb = setup(args) n_random_starts = max(1,args.n_random_starts) if x0 is None else args.n_random_starts callback = None if args.no_delete_worse else delete_worse_files_cb # Run the actual optimization if args.mode == 'gp': results = skopt.gp_minimize( f, dimensions, x0=x0, n_calls=args.n_calls, n_random_starts=n_random_starts, random_state=args.random_seed, verbose=True, acq_optimizer='sampling', xi=args.xi, kappa=args.kappa, callback=callback, ) elif args.mode == 'random': results = skopt.dummy_minimize( f, dimensions, x0=x0, n_calls=args.n_calls, random_state=args.random_seed, verbose=True, callback=callback, ) elif args.mode == 'tree': results = skopt.forest_minimize( f, dimensions, x0=x0, n_calls=args.n_calls, n_random_starts=n_random_starts, random_state=args.random_seed, verbose=True, xi=args.xi, kappa=args.kappa, callback=callback, ) # Maybe evaluate the best model on the test dataset if args.evaluate_on_test: logger.info('EVALUATE ON TEST') evaluate_on_test(args, results, trial_paths) # Save a bunch of visualizations of the search process logger.info('PLOTTING RESULTS') plot_results(args.serialization_dir, results) logger.info('ALL DONE')
def test_api(): res = dummy_minimize( branin, [(-5.0, 10.0), (0.0, 15.0)], random_state=0, maxiter=100) assert_array_equal(res.x.shape, (2,)) assert_array_equal(res.x_iters.shape, (100, 2)) assert_array_equal(res.func_vals.shape, (100,)) assert_array_less(res.x_iters, np.tile([10, 15], (100, 1))) assert_array_less(np.tile([-5, 0], (100, 1)), res.x_iters) assert_raises(ValueError, dummy_minimize, lambda x: x, [[-5, 10]])
def _trials_for_dims(names, dims, initial_x, num_trials, random_seed): res = skopt.dummy_minimize(lambda *args: 0, dims, n_calls=num_trials, random_state=random_seed) trials_xs = res.x_iters if trials_xs: _apply_initial_x(initial_x, trials_xs[0]) return [dict(zip(names, _native_python_xs(xs))) for xs in trials_xs]
def run(self): dimensions = [(0, 1)] * self._action_space results = dummy_minimize(self.optimization_function, dimensions, random_state=1, n_calls=1000, verbose=True) self.results['optimal_decision'] = results.x self.results['objective_value'] = results.fun
def test_api(): res = dummy_minimize(branin, [[-5, 10], [0, 15]], random_state=0, maxiter=100) assert_array_equal(res.x.shape, (2, )) assert_array_equal(res.x_iters.shape, (100, 2)) assert_array_equal(res.func_vals.shape, (100, )) assert_array_less(res.x_iters, np.tile([10, 15], (100, 1))) assert_array_less(np.tile([-5, 0], (100, 1)), res.x_iters) assert_raises(ValueError, dummy_minimize, lambda x: x, [[-5, 10]])
def test_minimizer_api_dummy_minimize(verbose, call): # dummy_minimize is special as it does not support all parameters # and does not fit any models n_calls = 7 result = dummy_minimize(branin, [(-5.0, 10.0), (0.0, 15.0)], n_calls=n_calls, random_state=1, verbose=verbose, callback=call) assert result.models == [] check_minimizer_api(result, n_calls) check_minimizer_bounds(result, n_calls) assert_raise_message(ValueError, "return a scalar", dummy_minimize, lambda x: x, [[-5, 10]])
def dummy(configurations, black_box_function, logger, verbose=False): search_space = [Integer(1, configurations["thread_limit"])] optimizer = dummy_minimize( func=black_box_function, dimensions=search_space, n_calls=configurations["random"]["num_of_exp"], random_state=None, x0=None, y0=None, verbose=verbose, ) return optimizer.x
def getBaseline(self): self.faketime = 0 for i in range(self.niter): filename = self.dir + '/experiments/' + str(self.id) + '/baseline_'+ str(i+1) +'.txt' if os.path.isfile(filename): x_iters,y_vals,time_vals = self.load_baseline(self.id, i) if self.ncalls-len(y_vals) > 0: self.savefile = open(filename, mode='a', newline='') self.startime = time() self.faketime = time_vals[-1] result = dummy_minimize(self.obj, self.searchSpace, verbose=True, n_calls=self.ncalls-len(y_vals), x0=x_iters, y0=y_vals, callback=[self.savePoint, self.earlyStopping]) else: result = ResultData(x_iters, y_vals, time_vals) self.ignorePoint = True else: self.savefile = open(filename, mode='w', newline='') self.startime = time() result = dummy_minimize(self.obj, self.searchSpace, verbose=True, n_calls=self.ncalls, callback=[self.savePoint]) self.baseline.append(result) if self.savefile != None: self.savefile.close()
def main(): parser = argparse.ArgumentParser(description='Setup experiment.') parser.add_argument('--results_dir', type=str, help='Path to results directory.') args = parser.parse_args() bounds = np.tile((-5., 5.), (5, 1)) results = dummy_minimize(stybtang, bounds, verbose=True, n_calls=1, random_state=0) results_path = os.path.join(args.results_dir, 'hyperband_stybtang.pkl') dump(results, results_path)
def test_minimizer_api_dummy_minimize(verbose, call): # dummy_minimize is special as it does not support all parameters # and does not fit any models n_calls = 7 result = dummy_minimize(branin, [(-5.0, 10.0), (0.0, 15.0)], n_calls=n_calls, random_state=1, verbose=verbose, callback=call) assert (result.models is None) check_minimizer_api(result, n_calls) check_minimizer_bounds(result, n_calls) assert_raise_message(ValueError, "return a scalar", dummy_minimize, lambda x: x, [[-5, 10]])
def test_checkpoint_saver(): checkpoint_path = "./test_checkpoint.pkl" if os.path.isfile(checkpoint_path): os.remove(checkpoint_path) checkpoint_saver = CheckpointSaver(checkpoint_path, compress=9) result = dummy_minimize(bench1, [(-1.0, 1.0)], callback=checkpoint_saver, n_calls=10) assert os.path.exists(checkpoint_path) assert load(checkpoint_path).x == result.x if os.path.isfile(checkpoint_path): os.remove(checkpoint_path)
def test_checkpoint_saver(): checkpoint_path = "./test_checkpoint.pkl" if os.path.isfile(checkpoint_path): os.remove(checkpoint_path) checkpoint_saver = CheckpointSaver(checkpoint_path, compress=9) result = dummy_minimize(bench1, [(-1.0, 1.0)], callback=checkpoint_saver, n_calls=10) assert os.path.exists(checkpoint_path) assert load(checkpoint_path).x == result.x if os.path.isfile(checkpoint_path): os.remove(checkpoint_path)
def random_forest_on(rmspe_rf, mape_rf): def treina_random_forest(params): max_depth = params[0] min_samples_split = params[1] min_samples_leaf = params[2] model = RandomForestRegressor( n_estimators=1500, max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf) return -np.mean( cross_val_score(model, X_train, y_train['point_estimate'], cv=cv_inner, scoring="neg_mean_squared_error") ) #mean_squared_error(y_test, p) resultado_random_forest = dummy_minimize( treina_random_forest, space_xgb, n_calls=n_calls_hyp, verbose=1) param_random = resultado_random_forest.x random_reg = RandomForestRegressor( n_estimators=1500, max_depth=param_random[0], min_samples_split=param_random[1], min_samples_leaf=param_random[2]) randreg = random_reg.fit(X_train, y_train['point_estimate']) rand_pred = randreg.predict(X_test) rmspe_rf.append((np.mean( ((y_test[dic.get(i)] - rand_pred) / y_test[dic.get(i)])**2))**0.5) mape_rf.append( np.mean( np.abs(y_test[dic.get(i)] - rand_pred) / y_test[dic.get(i)])) squared_rf.append( mean_squared_error(y_test[dic.get(i)], rand_pred)**0.5) absolut_rf.append( mean_absolute_error(y_test[dic.get(i)], rand_pred)) return (rmspe_rf, mape_rf)
def check_minimize(func, y_opt, dimensions, margin, n_calls): r = dummy_minimize(func, dimensions, n_calls=n_calls, random_state=1) assert_less(r.fun, y_opt + margin)
def test_timer_callback(): callback = TimerCallback() dummy_minimize(bench1, [(-1.0, 1.0)], callback=callback, n_calls=10) assert_equal(len(callback.iter_time), 10) assert_less(0.0, sum(callback.iter_time))
def check_minimize(func, y_opt, dimensions, margin, n_calls): r = dummy_minimize(func, dimensions, n_calls=n_calls, random_state=1) assert_less(r.fun, y_opt + margin)
def tune(self, params, evals=10, init_config=None, seed=None): """ Runs the hyperparameter search using Gaussian Process as surrogate model or Random Search, saves the results of the trials and print the best found parameters. 使用 高斯过程 作为 替代模型 进行 超参数 搜索 或 随机搜索 保存 并 打印 训练 得到的 最佳 参数 Parameters ---------- params: list List of skopt.space.space.Dimensions to be searched. 参数为 scikit-learn Base class for search space dimensions evals: int Number of evaluations to perform. init_config: list, default None An initial parameter configuration for seeding the Gaussian Process seed: int, default None Seed for random_state of `gp_minimize` or `dummy_minimize`. Set to a fixed integer for reproducibility. """ msg = 'Started ' + self.recommender_class.RECOMMENDER_NAME + ' ' + self.dataset_name subprocess.run(['telegram-send', msg]) # URM_test CSR矩阵的shape U, I = self.URM_test.shape if self.recommender_class == GANMF: params.append(Integer(4, int(I * 0.75) if I <= 1024 else 1024, name='emb_dim', dtype=int)) self.fit_param_names.append('emb_dim') if self.recommender_class == CFGAN or self.recommender_class == DeepGANMF: params.append(Integer(4, int(I * 0.75) if I <= 1024 else 1024, name='d_nodes', dtype=int)) params.append(Integer(4, int(I * 0.75) if I <= 1024 else 1024, name='g_nodes', dtype=int)) self.fit_param_names.append('d_nodes') self.fit_param_names.append('g_nodes') if self.recommender_class == DisGANMF: params.append(Integer(4, int(I * 0.75) if I <= 1024 else 1024, name='d_nodes', dtype=int)) self.fit_param_names.append('d_nodes') self.dimension_names = [p.name for p in params] ''' Need to make sure that the max. value of `num_factors` parameters must be lower than the max(U, I) ''' try: idx = self.dimension_names.index('num_factors') maxval = params[idx].bounds[1] if maxval > min(U, I): params[idx] = Integer(1, min(U, I), name='num_factors', dtype=int) except ValueError: pass if len(params) > 0: # Check if there is already a checkpoint for this experiment 检查点 checkpoint_path = os.path.join(self.logsdir, 'checkpoint.pkl') checkpoint_exists = True if os.path.exists(checkpoint_path) else False checkpoint_saver = CheckpointSaver(os.path.join(self.logsdir, 'checkpoint.pkl'), compress=3) if seed is None: seed = self.seed t_start = int(time.time()) if checkpoint_exists: previous_run = skopt.load(checkpoint_path) if self.method == 'bayesian': results = gp_minimize(self.obj_func, params, n_calls=evals - len(previous_run.func_vals), x0=previous_run.x_iters, y0=previous_run.func_vals, n_random_starts=0, random_state=seed, verbose=True, callback=[checkpoint_saver]) else: results = dummy_minimize(self.obj_func, params, n_calls=evals - len(previous_run.func_vals), x0=previous_run.x_iters, y0=previous_run.func_vals, random_state=seed, verbose=True, callback=[checkpoint_saver]) else: # 超参数优化 if self.method == 'bayesian': results = gp_minimize(self.obj_func, params, n_calls=evals, random_state=seed, verbose=True, callback=[checkpoint_saver]) else: results = dummy_minimize(self.obj_func, params, n_calls=evals, random_state=seed, verbose=True, callback=[checkpoint_saver]) t_end = int(time.time()) # Save best parameters of this experiment # best_params = dict(zip(self.dimension_names, results.x)) # with open(os.path.join(self.logsdir, 'best_params.pkl'), 'wb') as f: # pickle.dump(best_params, f, pickle.HIGHEST_PROTOCOL) best_params = self.load_best_params() with open(os.path.join(self.logsdir, 'results.txt'), 'a') as f: f.write('Experiment ran for {}\n'.format(str(datetime.timedelta(seconds=t_end - t_start)))) f.write('Best {} score: {}. Best result found at: {}\n'.format(self.metric, results.fun, best_params)) if self.recommender_class in [IALSRecommender, MatrixFactorization_BPR_Cython]: self.dimension_names.append('epochs') self.build_fit_params(best_params.values()) # Retrain with all training data set_seed(seed) if self.isGAN: model = self.recommender_class(self.URM_train, mode=train_mode, is_experiment=True) model.logsdir = self.logsdir model.fit(**self.fit_params) # load_models(model, save_dir='best_model', all_in_folder=True) else: model = self.recommender_class(self.URM_train) model.fit(**self.fit_params) # model.loadModel(os.path.join(self.logsdir, 'best_model')) _, results_run_string = self.evaluatorTest.evaluateRecommender(model) print('\n\nResults on test set:') print(results_run_string) print('\n\n') with open(os.path.join(self.logsdir, 'result_test.txt'), 'w') as f: f.write(results_run_string) msg = 'Finished ' + self.recommender_class.RECOMMENDER_NAME + ' ' + self.dataset_name subprocess.run(['telegram-send', msg])
def compare_optimizers(num_instances=4, graph_size=15, n_calls=8, n_random_starts=2): global pbar pbar = None # For progress bar. pbar = tqdm(total=num_instances*n_calls*4) instances = [Graph(graph_size) for _ in range(num_instances)] # Percent of optimal score acheived by each algorithm. dummy = [] decision_trees = [] gradient_boosted_trees = [] baynesian = [] # For each instance, run each algorithm. for inst in instances: # Scikit functions only take in parameters and want to minimize values. # Create a wrapper function to format get_expectation. sk_get_exp = lambda x: -1*get_expectation(x, inst) opt = inst.optimal_score()[0] # Dummy. inst.clear_runs() dummy_minimize(func=sk_get_exp, dimensions=[(0,2*np.pi),(0,np.pi)], n_calls=n_calls) dummy.append(float(inst.currentScore) / opt) # Decision Trees. inst.clear_runs() forest_minimize(func=sk_get_exp, dimensions=[(0,2*np.pi),(0,np.pi)], n_calls=n_calls, n_random_starts=n_random_starts) decision_trees.append(float(inst.currentScore) / opt) # Gradient Boosted Decision Trees. inst.clear_runs() gbrt_minimize(func=sk_get_exp, dimensions=[(0,2*np.pi),(0,np.pi)], n_calls=n_calls, n_random_starts=n_random_starts) gradient_boosted_trees.append(float(inst.currentScore) / opt) # Baynesian. inst.clear_runs() gp_minimize(func=sk_get_exp, dimensions=[(0,2*np.pi),(0,np.pi)], n_calls=n_calls, n_random_starts=n_random_starts) baynesian.append(float(inst.currentScore) / opt) # Compare mean/stdev of % opt. achieved for each algorithm. print("-- % of Optimal Achieved, Mean and Std. Dev --") print("Random Sampling:\nMean: %s\nStd. Dev: %s" % (mean(dummy), stdev(dummy))) print("Decision Trees:\nMean: %s\nStd. Dev: %s" % (mean(decision_trees), stdev(decision_trees))) print("Gradient Boosted Decision Trees:\nMean: %s\nStd. Dev: %s" % (mean(gradient_boosted_trees), stdev(gradient_boosted_trees))) print("Baynesian Optimization:\nMean: %s\nStd. Dev: %s" % (mean(baynesian), stdev(baynesian)))
def hyperdrive(objective, hyperparameters, results_path, model="GP", n_iterations=50, verbose=False, checkpoints_path=None, deadline=None, sampler=None, n_samples=None, random_state=0): """ Distributed optimization - one optimization per node. Parameters ---------- * `objective` [function]: User defined function which calls a learner and returns a metric of interest. * `hyperparameters` [list, shape=(n_hyperparameters,)]: * `results_path` [string] Path to save optimization results * `checkpoint_path` [string] Path to previously saved results. Used to resume optimization. * `model` [string, default="GP"] Probilistic learner used to model our objective function. Options: - "GP": Gaussian process - "RF": Random forest - "GBRT": Gradient boosted regression trees - "RAND": Random search * `n_iterations` [int, default=50] Number of optimization iterations * `verbose` [bool, default=False] Verbosity of optimization. * `checkpoints` [bool, default=False] Whether to checkpoint at each step of the optimization. * `deadline` [int, optional] Deadline (seconds) for the optimization to finish within. * `sampler` [str, default=None] Random sampling scheme for optimizer's initial runs. Options: - "lhs": latin hypercube sampling * `n_samples` [int, default=None] Number of random samples to be drawn from the `sampler`. - Required if you would like to use `sampler`. - Must be <= the number of elements in the smallest hyperparameter bound's set. * `random_state` [int, default=0] Random state for reproducibility. """ comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() if checkpoints_path and sampler: raise ValueError('Cannot use both a restart from a previous run and ' \ 'use latin hypercube sampling for initial search points!') # Setup savefile if rank < 10: # Ensure results are sorted by rank filename = 'hyperspace' + str(0) + str(rank) else: filename = 'hyperspace' + str(rank) savefile = os.path.join(results_path, filename) # Create hyperspaces, and either sampling bounds or checkpoints hyperspace = create_hyperspace(hyperparameters) space = hyperspace[rank] # Latin hypercube sampling if sampler and not n_samples: raise ValueError(f'Sampler requires n_samples > 0. Got {n_samples}') elif sampler and n_samples: hyperbounds = create_hyperbounds(hyperparameters) bounds = hyperbounds[rank] # Get initial points in domain via latin hypercube sampling init_points = lhs_start(bounds, n_samples) init_response = None n_rand = 10 - len(init_points) else: init_points = None init_response = None n_rand = 10 # Resuming from checkpoint if checkpoints_path: checkpoint = _load_checkpoint(checkpoints_path, rank) try: init_points = checkpoint.x_iters init_response = checkpoint.func_vals n_rand = 10 - len(init_points) except AttributeError: # Missing saves won't have initial values. init_points = None init_response = None n_rand = 10 callbacks = [] if deadline: deadline = DeadlineStopper(deadline) callbacks.append(deadline) if checkpoints_path: checkpoint_callback = CheckpointSaver(checkpoints_path, filename) callbacks.append(checkpoint_callback) # Thanks Guido for refusing to believe in switch statements. # Case 0 if model == "GP": # Verbose mode should only run on node 0. if verbose and rank == 0: result = gp_minimize(objective, space, n_calls=n_iterations, verbose=verbose, callback=callbacks, x0=init_points, y0=init_response, n_random_starts=n_rand, random_state=random_state) else: result = gp_minimize(objective, space, n_calls=n_iterations, callback=callbacks, x0=init_points, y0=init_response, n_random_starts=n_rand, random_state=random_state) # Case 1 elif model == "RF": if verbose and rank == 0: result = forest_minimize(objective, space, n_calls=n_iterations, verbose=verbose, callback=callbacks, x0=init_points, y0=init_response, n_random_starts=n_rand, random_state=random_state) else: result = forest_minimize(objective, space, n_calls=n_iterations, callback=callbacks, x0=init_points, y0=init_response, n_random_starts=n_rand, random_state=random_state) # Case 2 elif model == "GBRT": if verbose and rank == 0: result = gbrt_minimize(objective, space, n_calls=n_iterations, verbose=verbose, callback=callbacks, x0=init_points, y0=init_response, n_random_starts=n_rand, random_state=random_state) else: result = gbrt_minimize(objective, space, n_calls=n_iterations, callback=callbacks, x0=init_points, y0=init_response, n_random_starts=n_rand, random_state=random_state) # Case 3 elif model == "RAND": if verbose and rank == 0: result = dummy_minimize(objective, space, n_calls=n_iterations, verbose=verbose, callback=callbacks, x0=init_points, y0=init_response, random_state=random_state) else: result = dummy_minimize(objective, space, n_calls=n_iterations, callback=callbacks, x0=init_points, y0=init_response, random_state=random_state) else: raise ValueError("Invalid model {}. Read the documentation for " "supported models.".format(model)) # Each worker will independently write their results to disk dump(result, savefile)
############################################################################# # The two dimensional partial dependence plot can look like the true # objective but it does not have to. As points at which the objective function # is being evaluated are concentrated around the suspected minimum the # surrogate model sometimes is not a good representation of the objective far # away from the minima. # # Random sampling # =============== # # Compare this to a minimizer which picks points at random. There is no # structure visible in the order in which it evaluates the objective. Because # there is no model involved in the process of picking sample points at # random, we can not plot the partial dependence of the model. dummy_res = dummy_minimize(branin, bounds, n_calls=n_calls, random_state=4) _ = plot_evaluations(dummy_res, bins=10) ############################################################################# # Working in six dimensions # ========================= # # Visualising what happens in two dimensions is easy, where # :class:`plots.plot_evaluations` and :class:`plots.plot_objective` start to be useful is when the # number of dimensions grows. They take care of many of the more mundane # things needed to make good plots of all combinations of the dimensions. # # The next example uses class:`benchmarks.hart6` which has six dimensions and shows both # :class:`plots.plot_evaluations` and :class:`plots.plot_objective`.
def test_timer_callback(): callback = TimerCallback() dummy_minimize(bench1, [(-1.0, 1.0)], callback=callback, n_calls=10) assert_equal(len(callback.iter_time), 10) assert_less(0.0, sum(callback.iter_time))
def get_random_samples(_): return dummy_minimize(score_config, space, n_calls=n_initial_random_samples, random_state=seed)
tournament_fraction=0.2, mutation_rate=0.05): """ Use the GeneticOptimizer here """ optimizer = GeneticOptimizer(dimensions=dimensions, n_random_starts=n_random_starts, tournament_fraction=tournament_fraction, mutation_rate=mutation_rate) for i in range(n_calls): x = optimizer.ask() y = func(x) optimizer.tell(x, y) return create_result(optimizer.Xi, optimizer.yi, dimensions) if __name__ == "__main__": from skopt.benchmarks import branin from skopt import dummy_minimize space = [Real(-5, 10), Real(0, 15)] a = ga_minimize(branin, space, n_calls=64) b = dummy_minimize(branin, space, n_calls=64) c = rs_minimize(branin, space, n_calls=64) print(a.fun) print(b.fun) print(c.fun)
def area_limitada(new_df, space_xgb, get_bairro=0): """ Bairro indica a forma de lidar com a variável de localização bairro = 0, usa a variável de localização sem alteração bairro = 1, usa a variável de localização removendo parte dos valóres de localização bairro = 2, não usa a variável de localização. """ from skopt import dummy_minimize from sklearn.model_selection import cross_val_score, KFold from sklearn.metrics import mean_absolute_error, mean_squared_error import xgboost as xgb n_calls_hyp = 5 squared_percen_point_results = [] percen_outer_point_results = [] squared_error = [] abs_error = [] X = new_df train_limit = X[((X['latitude'] > -23.5884) & (X['latitude'] < -23.5495) & (X['longitude'] > -46.6817) & (X['longitude'] < -46.6379))] test_limit = X[~((X['latitude'] > -23.5884) & (X['latitude'] < -23.5495) & (X['longitude'] > -46.6817) & (X['longitude'] < -46.6379))] X_train = train_limit.drop([ 'id', 'address', 'tower_name', 'point_estimate', 'maximum_estimate', 'minimum_estimate', 'latitude', 'longitude' ], axis=1) X_test = test_limit.drop([ 'id', 'address', 'tower_name', 'point_estimate', 'maximum_estimate', 'minimum_estimate', 'latitude', 'longitude' ], axis=1) y_train = train_limit[['point_estimate', 'maximum_estimate']] y_test = test_limit[['point_estimate', 'maximum_estimate']] if get_bairro == 0: X_train, X_test = bairro(X_train, X_test, y_train, train_rf=False) if get_bairro == 1: X_train.loc[X_train['bairro'].sample(int(np.round(len(X_train) * 0.1))).index, 'bairro'] = np.NaN X_train, X_test = bairro(X_train, X_test, y_train, train_rf=False) if get_bairro == 2: X_train.drop('bairro', axis=1, inplace=True) X_test.drop('bairro', axis=1, inplace=True) y_train, y_test = y_train['point_estimate'], y_test['point_estimate'] cv_inner = KFold(n_splits=2, shuffle=True) def treina_xgb_2(params): learning_rate = params[0] n_estimators = params[1] max_depth = params[2] min_child_weight = params[3] gamma = params[4] subsample = params[5] colsample_bytree = params[6] model = xgb.XGBRegressor(learning_rate=learning_rate, n_estimators=n_estimators, max_depth=max_depth, min_child_weight=min_child_weight, gamma=gamma, subsample=subsample, colsample_bytree=colsample_bytree) return -np.mean( cross_val_score(model, X_train, y_train, cv=cv_inner, scoring="neg_mean_squared_error") ) #mean_squared_error(y_test, p) resultado_xgb = dummy_minimize(treina_xgb_2, space_xgb, n_calls=n_calls_hyp, verbose=1) param_xgb = resultado_xgb.x xgb_reg = xgb.XGBRegressor(learning_rate=param_xgb[0], n_estimators=param_xgb[1], max_depth=param_xgb[2], min_child_weight=param_xgb[3], gamma=param_xgb[4], subsample=param_xgb[5], colsample_bytree=param_xgb[6]) xgbclf = xgb_reg.fit(X_train, y_train) xgb_pred = xgbclf.predict(X_test) squared_percen_point_results.append((np.mean( ((y_test - xgb_pred) / y_test)**2))**0.5) percen_outer_point_results.append( np.mean(np.abs(y_test - xgb_pred) / y_test)) squared_error.append(mean_squared_error(y_test, xgb_pred)**0.5) abs_error.append(mean_absolute_error(y_test, xgb_pred)) results_dic = { 'RMSPE': squared_percen_point_results, 'MAPE': percen_outer_point_results, 'MAE': abs_error, 'RMSQ': squared_error } return pd.DataFrame(results_dic)
def check_minimize(func, y_opt, dimensions, margin, maxiter): r = dummy_minimize(func, dimensions, maxiter=maxiter, random_state=1) assert_less(r.fun, y_opt + margin)
REND = 0.9 potencia = REND * (RHO / (2 * SNOZ**2)) * (Hg / (1 / (2 * G * SNOZ**2) + F * L / (D**5)))**(3 / 2) caudal = (Hg / (1 / (2 * G * SNOZ**2) + F * L / D**5))**(1 / 2) coste = (L + 50 * sum(solucion)) * D**2 if potencia < 8e3: coste = 1000000 if caudal > 35e-3: coste = 1000000 return coste x0 = crea_individuo() res = dummy_minimize(fitness_function_single, [(0, 1) for i in range(205)], x0=[x0]) fichero = open("individuos.txt", "w") fichero2 = open("fitness.txt", "w") for i in res.x_iters: fichero.write(str(i)) fichero.write(str("\n")) for j in res.func_vals: fichero2.write(str(j)) fichero2.write("\n") fichero.close() fichero2.close()
FLAGS, unparsed = parser.parse_known_args() boundsOpt = [ Real(-8, -1, name='log_init_lr'), Real(-8, -1, name='log_l2_r'), Categorical([512, 256], name='batch_size'), Categorical(['relu', 'tanh', 'softsign'], name='activation'), Real(0, 0.8, 'uniform', name='dropout_rate'), ] t1 = time.time() checkpoint_saver = CheckpointSaver( 'C:/behrouz/projects/behrouz-Rui-Gaurav-project/excel-pbi-modeling/' 'imbalanced_batch/randomSearch_checkpoint.pkl') results = dummy_minimize( run_model, boundsOpt, n_calls=200, random_state=None, # set it for reproducible results verbose=True, callback=[checkpoint_saver]) t2 = time.time() print('-' * 100) print('Best parameters Obtained:') print('Minimum Validation Loss obtained=', results.fun) print('Learning Rate:', 10**results.x[0], 'l2:', 10**results.x[1], 'Batch size:', results.x[2], 'activation= ', results.x[3], 'Dropout= ', results.x[4]) print('-' * 100) print('Total Time(min) for optimization= ', round((t2 - t1) / 60, 2)) from matplotlib import pyplot as plt plt.plot(results.func_vals) plt.ylabel('Validation Loss')
def get_plan(funcs, norm, goals=None, solver='gp_minimize', n_calls=25, random_state=None, n_initial_points=10, verbose=True): """Hyperparameter optimization for RayStation treatment planning. Hyperparameter optimization for RayStation treatment planning using the following solvers from scikit-optimize: - `gp_minimize`: Bayesian optimization using Gaussian processes. - `forest_minimize`: Sequential optimization using decision trees. - `dummy_minimize`: Random search by uniform sampling within the given bounds. For more details about scikit-optimize, refer to https://scikit-optimize.github.io/stable/index.html Parameters ---------- funcs : str Path to CSV with constituent function specifications. norm : (str, float, float) Region of interest, dose, and volume used for normalization. goals : pandas.DataFrame or str, optional Path to CSV with clinical goal specifications. If None, goals are assigned based on constituent functions. solver : {'gp_minimize', 'forest_minimize', 'dummy_minimize'}, optional Name of scikit-optimize solver to use. n_calls : int, optional Number of calls to objective. random_state : int, optional Set random state for reproducible results. n_initial_points : int, optional Number of random function evaluations before function approximation. verbose : bool, optional Control the verbosity of the solver. Returns ------- raybay.RaybayResult RayStation treatment plan results. """ # Get RayStation objects patient = connect.get_current('Patient') case = connect.get_current('Case') plan = connect.get_current('Plan') beam_set = connect.get_current('BeamSet') # Initialize result object result = raybay.RaybayResult(patient.Name, case.CaseName, plan.Name, funcs, norm, goals, solver) # Optimize def obj(pars): return objective(plan, beam_set, result, funcs[:-9], pars) checkpoint_path = funcs[:-9] + 'checkpoint.pkl' checkpoint_saver = skopt.callbacks.CheckpointSaver(checkpoint_path, store_objective=False) start_time = time() if solver == 'forest_minimize': result.opt_result = skopt.forest_minimize( obj, dimensions=get_dims(result.func_df), n_calls=n_calls, n_initial_points=n_initial_points, random_state=random_state, verbose=verbose, callback=[checkpoint_saver]) elif solver == 'dummy_minimize': result.opt_result = skopt.dummy_minimize(obj, dimensions=get_dims( result.func_df), n_calls=n_calls, random_state=random_state, verbose=verbose, callback=[checkpoint_saver]) else: result.opt_result = skopt.gp_minimize( obj, dimensions=get_dims(result.func_df), n_calls=n_calls, n_initial_points=n_initial_points, random_state=random_state, verbose=verbose, callback=[checkpoint_saver]) result.opt_result.specs['args']['func'] = 'local' # remove local func result.time = time() - start_time # to allow pickling # Get optimal dose-volume histogram set_pars(plan, result.func_df, result.opt_result.x) calc_plan(plan, beam_set, result.norm) result.dvh_dict = get_dvh(result.roi_list) return result