def resolve_early_stopping(early_stopping, max_iters, metric_name): if isinstance(early_stopping, str): if early_stopping in TuneBaseSearchCV.defined_schedulers: if early_stopping == "PopulationBasedTraining": return PopulationBasedTraining(metric=metric_name, mode="max") elif early_stopping == "AsyncHyperBandScheduler": return AsyncHyperBandScheduler(metric=metric_name, mode="max", max_t=max_iters) elif early_stopping == "HyperBandScheduler": return HyperBandScheduler(metric=metric_name, mode="max", max_t=max_iters) elif early_stopping == "MedianStoppingRule": return MedianStoppingRule(metric=metric_name, mode="max") elif early_stopping == "ASHAScheduler": return ASHAScheduler(metric=metric_name, mode="max", max_t=max_iters) elif early_stopping == "HyperBandForBOHB": return HyperBandForBOHB(metric=metric_name, mode="max", max_t=max_iters) raise ValueError( "{} is not a defined scheduler. " "Check the list of available schedulers.".format(early_stopping)) elif isinstance(early_stopping, TrialScheduler): early_stopping._metric = metric_name early_stopping._mode = "max" return early_stopping else: raise TypeError("`early_stopping` must be a str, boolean, " f"or tune scheduler. Got {type(early_stopping)}.")
def tunerTrain(): ray.init(_memory=4000000000, num_cpus=5) searchSpace = { 'lr': tune.loguniform(1e-4, 9e-1), 'finalOutput': tune.uniform(2, 50), # minimum of 2, other 1//2 = 0 activation maps 'stride1': tune.uniform(1, 4), 'stride2': tune.uniform(1, 4), 'batchSize': tune.uniform(2, 32), 'finalChannel': tune.uniform(1, 50), } # analysis = tune.run(train, num_samples=1, scheduler=ASHAScheduler(metric='score', mode='max'), # config=searchSpace) algo = TuneBOHB(max_concurrent=4, metric="mean_loss", mode="max") bohb = HyperBandForBOHB( metric="mean_loss", mode="max", ) analysis = tune.run(train, config=searchSpace, scheduler=bohb, search_alg=algo, num_samples=10) # bayesopt = BayesOptSearch( metric="mean_loss", mode="max", random_search_steps = 3) # tune.run(train, search_alg=bayesopt, config= searchSpace, scheduler=ASHAScheduler("mean_loss","max")) print("Best config: ", analysis.get_best_config(metric="mean_loss", mode="max"))
def testLargestBracketFirst(self): sched = HyperBandForBOHB(max_t=3, reduction_factor=3) runner = _MockTrialRunner(sched) for i in range(3): t = Trial("__fake") sched.on_trial_add(runner, t) runner._launch_trial(t) self.assertEqual(sched.state()["num_brackets"], 1) sched.on_trial_add(runner, Trial("__fake")) self.assertEqual(sched.state()["num_brackets"], 2)
def get_raytune_schedule(raytune_cfg): if raytune_cfg["sched"] == "asha": return AsyncHyperBandScheduler( metric=raytune_cfg["default_metric"], mode=raytune_cfg["default_mode"], time_attr="training_iteration", max_t=raytune_cfg["asha"]["max_t"], grace_period=raytune_cfg["asha"]["grace_period"], reduction_factor=raytune_cfg["asha"]["reduction_factor"], brackets=raytune_cfg["asha"]["brackets"], ) elif raytune_cfg["sched"] == "hyperband": return HyperBandScheduler( metric=raytune_cfg["default_metric"], mode=raytune_cfg["default_mode"], time_attr="training_iteration", max_t=raytune_cfg["hyperband"]["max_t"], reduction_factor=raytune_cfg["hyperband"]["reduction_factor"], ) # requires pip install hpbandster ConfigSpace elif (raytune_cfg["sched"] == "bohb") or (raytune_cfg["sched"] == "BOHB"): return HyperBandForBOHB( metric=raytune_cfg["default_metric"], mode=raytune_cfg["default_mode"], time_attr="training_iteration", max_t=raytune_cfg["hyperband"]["max_t"], reduction_factor=raytune_cfg["hyperband"]["reduction_factor"], ) elif (raytune_cfg["sched"] == "pbt") or (raytune_cfg["sched"] == "PBT"): return PopulationBasedTraining( metric=raytune_cfg["default_metric"], mode=raytune_cfg["default_mode"], time_attr="training_iteration", perturbation_interval=raytune_cfg["pbt"]["perturbation_interval"], hyperparam_mutations=raytune_cfg["pbt"]["hyperparam_mutations"], log_config=True, ) # requires pip install GPy sklearn elif (raytune_cfg["sched"] == "pb2") or (raytune_cfg["sched"] == "PB2"): return PB2( metric=raytune_cfg["default_metric"], mode=raytune_cfg["default_mode"], time_attr="training_iteration", perturbation_interval=raytune_cfg["pb2"]["perturbation_interval"], hyperparam_bounds=raytune_cfg["pb2"]["hyperparam_bounds"], log_config=True, ) else: print("INFO: Not using any Ray Tune trial scheduler.") return None
def testCheckTrialInfoUpdateMin(self): def result(score, ts): return {"episode_reward_mean": score, TRAINING_ITERATION: ts} sched = HyperBandForBOHB(max_t=3, reduction_factor=3, mode="min") runner = _MockTrialRunner(sched) runner._search_alg = MagicMock() trials = [Trial("__fake") for i in range(3)] for t in trials: runner.add_trial(t) runner._launch_trial(t) for trial, trial_result in zip(trials, [result(1, 1), result(2, 1)]): decision = sched.on_trial_result(runner, trial, trial_result) self.assertEqual(decision, TrialScheduler.PAUSE) runner._pause_trial(trial) spy_result = result(0, 1) decision = sched.on_trial_result(runner, trials[-1], spy_result) self.assertEqual(decision, TrialScheduler.CONTINUE) sched.choose_trial_to_run(runner) self.assertEqual(runner._search_alg.on_pause.call_count, 2) self.assertTrue("hyperband_info" in spy_result) self.assertEquals(spy_result["hyperband_info"]["budget"], 1)
def _test_xgboost(method='BlendSearch'): try: import ray except ImportError: return if method == 'BlendSearch': from flaml import tune else: from ray import tune search_space = { # You can mix constants with search space objects. "max_depth": tune.randint(1, 8) if method in [ "BlendSearch", "BOHB", "Optuna"] else tune.randint(1, 9), "min_child_weight": tune.choice([1, 2, 3]), "subsample": tune.uniform(0.5, 1.0), "eta": tune.loguniform(1e-4, 1e-1) } max_iter = 10 for num_samples in [256]: time_budget_s = 60 #None for n_cpu in [8]: start_time = time.time() ray.init(num_cpus=n_cpu, num_gpus=0) if method == 'BlendSearch': analysis = tune.run( train_breast_cancer, init_config={ "max_depth": 1, "min_child_weight": 3, }, cat_hp_cost={ "min_child_weight": [6, 3, 2], }, metric="eval-logloss", mode="min", max_resource=max_iter, min_resource=1, report_intermediate_result=True, # You can add "gpu": 0.1 to allocate GPUs resources_per_trial={"cpu": 1}, config=search_space, local_dir='logs/', num_samples=num_samples*n_cpu, time_budget_s=time_budget_s, use_ray=True) else: if 'ASHA' == method: algo = None elif 'BOHB' == method: from ray.tune.schedulers import HyperBandForBOHB from ray.tune.suggest.bohb import TuneBOHB algo = TuneBOHB(max_concurrent=n_cpu) scheduler = HyperBandForBOHB(max_t=max_iter) elif 'Optuna' == method: from ray.tune.suggest.optuna import OptunaSearch algo = OptunaSearch() elif 'CFO' == method: from flaml import CFO algo = CFO(points_to_evaluate=[{ "max_depth": 1, "min_child_weight": 3, }], cat_hp_cost={ "min_child_weight": [6, 3, 2], }) elif 'Dragonfly' == method: from ray.tune.suggest.dragonfly import DragonflySearch algo = DragonflySearch() elif 'SkOpt' == method: from ray.tune.suggest.skopt import SkOptSearch algo = SkOptSearch() elif 'Nevergrad' == method: from ray.tune.suggest.nevergrad import NevergradSearch import nevergrad as ng algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne) elif 'ZOOpt' == method: from ray.tune.suggest.zoopt import ZOOptSearch algo = ZOOptSearch(budget=num_samples*n_cpu) elif 'Ax' == method: from ray.tune.suggest.ax import AxSearch algo = AxSearch() elif 'HyperOpt' == method: from ray.tune.suggest.hyperopt import HyperOptSearch algo = HyperOptSearch() scheduler = None if method != 'BOHB': from ray.tune.schedulers import ASHAScheduler scheduler = ASHAScheduler( max_t=max_iter, grace_period=1) analysis = tune.run( train_breast_cancer, metric="eval-logloss", mode="min", # You can add "gpu": 0.1 to allocate GPUs resources_per_trial={"cpu": 1}, config=search_space, local_dir='logs/', num_samples=num_samples*n_cpu, time_budget_s=time_budget_s, scheduler=scheduler, search_alg=algo) ray.shutdown() # # Load the best model checkpoint # best_bst = xgb.Booster() # best_bst.load_model(os.path.join(analysis.best_checkpoint, # "model.xgb")) best_trial = analysis.get_best_trial("eval-logloss","min","all") accuracy = 1. - best_trial.metric_analysis["eval-error"]["min"] logloss = best_trial.metric_analysis["eval-logloss"]["min"] logger.info(f"method={method}") logger.info(f"n_samples={num_samples*n_cpu}") logger.info(f"time={time.time()-start_time}") logger.info(f"Best model eval loss: {logloss:.4f}") logger.info(f"Best model total accuracy: {accuracy:.4f}") logger.info(f"Best model parameters: {best_trial.config}")
def cifar10_main(method='BlendSearch', num_samples=10, max_num_epochs=100, gpus_per_trial=2): data_dir = os.path.abspath("test/data") load_data(data_dir) # Download data for all trials before starting the run if method == 'BlendSearch': from flaml import tune else: from ray import tune if method in ['BlendSearch', 'BOHB', 'Optuna']: config = { "l1": tune.randint(2, 8), "l2": tune.randint(2, 8), "lr": tune.loguniform(1e-4, 1e-1), "num_epochs": tune.qloguniform(1, max_num_epochs, q=1), "batch_size": tune.randint(1, 4) #tune.choice([2, 4, 8, 16]) } else: config = { "l1": tune.randint(2, 9), "l2": tune.randint(2, 9), "lr": tune.loguniform(1e-4, 1e-1), "num_epochs": tune.qloguniform(1, max_num_epochs + 1, q=1), "batch_size": tune.randint(1, 5) #tune.choice([2, 4, 8, 16]) } import ray time_budget_s = 3600 start_time = time.time() if method == 'BlendSearch': result = tune.run(ray.tune.with_parameters(train_cifar, data_dir=data_dir), init_config={ "l1": 2, "l2": 2, "num_epochs": 1, "batch_size": 4, }, metric="loss", mode="min", max_resource=max_num_epochs, min_resource=1, report_intermediate_result=True, resources_per_trial={ "cpu": 2, "gpu": gpus_per_trial }, config=config, local_dir='logs/', num_samples=num_samples, time_budget_s=time_budget_s, use_ray=True) else: if 'ASHA' == method: algo = None elif 'BOHB' == method: from ray.tune.schedulers import HyperBandForBOHB from ray.tune.suggest.bohb import TuneBOHB algo = TuneBOHB() scheduler = HyperBandForBOHB(max_t=max_num_epochs) elif 'Optuna' == method: from ray.tune.suggest.optuna import OptunaSearch algo = OptunaSearch() elif 'CFO' == method: from flaml import CFO algo = CFO(points_to_evaluate=[{ "l1": 2, "l2": 2, "num_epochs": 1, "batch_size": 4, }]) elif 'Nevergrad' == method: from ray.tune.suggest.nevergrad import NevergradSearch import nevergrad as ng algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne) if method != 'BOHB': from ray.tune.schedulers import ASHAScheduler scheduler = ASHAScheduler(max_t=max_num_epochs, grace_period=1) result = tune.run(tune.with_parameters(train_cifar, data_dir=data_dir), resources_per_trial={ "cpu": 2, "gpu": gpus_per_trial }, config=config, metric="loss", mode="min", num_samples=num_samples, time_budget_s=time_budget_s, scheduler=scheduler, search_alg=algo) ray.shutdown() logger.info(f"method={method}") logger.info(f"n_samples={num_samples}") logger.info(f"time={time.time()-start_time}") best_trial = result.get_best_trial("loss", "min", "all") logger.info("Best trial config: {}".format(best_trial.config)) logger.info("Best trial final validation loss: {}".format( best_trial.metric_analysis["loss"]["min"])) logger.info("Best trial final validation accuracy: {}".format( best_trial.metric_analysis["accuracy"]["max"])) best_trained_model = Net(2**best_trial.config["l1"], 2**best_trial.config["l2"]) device = "cpu" if torch.cuda.is_available(): device = "cuda:0" if gpus_per_trial > 1: best_trained_model = nn.DataParallel(best_trained_model) best_trained_model.to(device) checkpoint_path = os.path.join(best_trial.checkpoint.value, "checkpoint") model_state, optimizer_state = torch.load(checkpoint_path) best_trained_model.load_state_dict(model_state) test_acc = _test_accuracy(best_trained_model, device) logger.info("Best trial test set accuracy: {}".format(test_acc))
def __init__(self, estimator, param_distributions, early_stopping=None, n_trials=10, scoring=None, n_jobs=None, sk_n_jobs=-1, refit=True, cv=None, verbose=0, random_state=None, error_score=np.nan, return_train_score=False, local_dir="~/ray_results", max_iters=1, search_optimization="random", use_gpu=False, **search_kwargs): search_optimization = search_optimization.lower() available_optimizations = [ "random", "bayesian", # scikit-optimize/SkOpt "bohb", "hyperopt", # "optuna", # optuna is not yet in stable ray.tune ] if (search_optimization not in available_optimizations): raise ValueError("Search optimization must be one of " f"{', '.join(available_optimizations)}") if (search_optimization != "random" and random_state is not None): warnings.warn( "random state is ignored when not using Random optimization") self._try_import_required_libraries(search_optimization) if isinstance(param_distributions, list): if search_optimization != "random": raise ValueError("list of dictionaries for parameters " "is not supported for non-random search") if isinstance(param_distributions, dict): check_param_distributions = [param_distributions] else: check_param_distributions = param_distributions can_use_param_distributions = False if search_optimization == "bohb": import ConfigSpace as CS can_use_param_distributions = isinstance(check_param_distributions, CS.ConfigurationSpace) if not can_use_param_distributions: for p in check_param_distributions: for dist in p.values(): _check_distribution(dist, search_optimization) if search_optimization == "bohb": from ray.tune.schedulers import HyperBandForBOHB if not isinstance(early_stopping, HyperBandForBOHB): early_stopping = HyperBandForBOHB(metric="average_test_score", max_t=max_iters) super(TuneSearchCV, self).__init__(estimator=estimator, early_stopping=early_stopping, scoring=scoring, n_jobs=n_jobs or -1, sk_n_jobs=sk_n_jobs, cv=cv, verbose=verbose, refit=refit, error_score=error_score, return_train_score=return_train_score, local_dir=local_dir, max_iters=max_iters, use_gpu=use_gpu) self.param_distributions = param_distributions self.num_samples = n_trials if search_optimization == "random": self.random_state = random_state if search_kwargs: raise ValueError("Random search does not support " f"extra args: {search_kwargs}") self.search_optimization = search_optimization self.search_kwargs = search_kwargs
def hparams(algorithm, scheduler, num_samples, tensorboard, bare): from glob import glob import tensorflow.summary from tensorflow import random as tfrandom, int64 as tfint64 from ray import init as init_ray, shutdown as shutdown_ray from ray import tune from wandb.ray import WandbLogger from wandb import sweep as wandbsweep from wandb.apis import CommError as wandbCommError # less summaries are logged if MLENCRYPT_TB is TRUE (for efficiency) # TODO: use tf.summary.record_if? environ["MLENCRYPT_TB"] = str(tensorboard).upper() environ["MLENCRYPT_BARE"] = str(bare).upper() if getenv('MLENCRYPT_TB', 'FALSE') == 'TRUE' and \ getenv('MLENCRYPT_BARE', 'FALSE') == 'TRUE': raise ValueError('TensorBoard logging cannot be enabled in bare mode.') logdir = f'logs/hparams/{datetime.now()}' # "These results show that K = 3 is the optimal choice for the # cryptographic application of neural synchronization. K = 1 and K = 2 are # too insecure in regard to the geometric attack. And for K > 3 the effort # of A and B grows exponentially with increasing L, while the simple attack # is quite successful in the limit K -> infinity. Consequently, one should # only use Tree Parity Machines with three hidden units for the neural # key-exchange protocol." (Ruttor, 2006) # https://arxiv.org/pdf/0711.2411.pdf#page=59 update_rules = [ 'random-same', # 'random-different-A-B-E', 'random-different-A-B', 'hebbian', 'anti_hebbian', 'random_walk' ] K_bounds = {'min': 4, 'max': 8} N_bounds = {'min': 4, 'max': 8} L_bounds = {'min': 4, 'max': 8} # TODO: don't use *_bounds.values() since .values doesn't preserve order def get_session_num(logdir): current_runs = glob(join(logdir, "run-*")) if current_runs: last_run_path = current_runs[-1] last_run_session_num = int(last_run_path.split('-')[-1]) return last_run_session_num + 1 else: # there are no runs yet, start at 0 return 0 def trainable(config, reporter): """ Args: config (dict): Parameters provided from the search algorithm or variant generation. """ if not isinstance(config['update_rule'], str): update_rule = update_rules[int(config['update_rule'])] else: update_rule = config['update_rule'] K, N, L = int(config['K']), int(config['N']), int(config['L']) run_name = f"run-{get_session_num(logdir)}" run_logdir = join(logdir, run_name) # for each attack, the TPMs should start with the same weights initial_weights_tensors = get_initial_weights(K, N, L) training_steps_ls = {} eve_scores_ls = {} losses_ls = {} # for each attack, the TPMs should use the same inputs seed = tfrandom.uniform([], minval=0, maxval=tfint64.max, dtype=tfint64).numpy() for attack in ['none', 'geometric']: initial_weights = { tpm: weights_tensor_to_variable(weights, tpm) for tpm, weights in initial_weights_tensors.items() } tfrandom.set_seed(seed) if tensorboard: attack_logdir = join(run_logdir, attack) attack_writer = tensorflow.summary.create_file_writer( attack_logdir) with attack_writer.as_default(): training_steps, sync_scores, loss = run( update_rule, K, N, L, attack, initial_weights) else: training_steps, sync_scores, loss = run( update_rule, K, N, L, attack, initial_weights) training_steps_ls[attack] = training_steps eve_scores_ls[attack] = sync_scores losses_ls[attack] = loss avg_training_steps = tensorflow.math.reduce_mean( list(training_steps_ls.values())) avg_eve_score = tensorflow.math.reduce_mean( list(eve_scores_ls.values())) mean_loss = tensorflow.math.reduce_mean(list(losses_ls.values())) reporter( avg_training_steps=avg_training_steps.numpy(), avg_eve_score=avg_eve_score.numpy(), mean_loss=mean_loss.numpy(), done=True, ) if algorithm == 'hyperopt': from hyperopt import hp as hyperopt from hyperopt.pyll.base import scope from ray.tune.suggest.hyperopt import HyperOptSearch space = { 'update_rule': hyperopt.choice( 'update_rule', update_rules, ), 'K': scope.int(hyperopt.quniform('K', *K_bounds.values(), q=1)), 'N': scope.int(hyperopt.quniform('N', *N_bounds.values(), q=1)), 'L': scope.int(hyperopt.quniform('L', *L_bounds.values(), q=1)), } algo = HyperOptSearch( space, metric='mean_loss', mode='min', points_to_evaluate=[ { 'update_rule': 0, 'K': 3, 'N': 16, 'L': 8 }, { 'update_rule': 0, 'K': 8, 'N': 16, 'L': 8 }, { 'update_rule': 0, 'K': 8, 'N': 16, 'L': 128 }, ], ) elif algorithm == 'bayesopt': from ray.tune.suggest.bayesopt import BayesOptSearch space = { 'update_rule': (0, len(update_rules)), 'K': tuple(K_bounds.values()), 'N': tuple(N_bounds.values()), 'L': tuple(L_bounds.values()), } algo = BayesOptSearch( space, metric="mean_loss", mode="min", # TODO: what is utility_kwargs for and why is it needed? utility_kwargs={ "kind": "ucb", "kappa": 2.5, "xi": 0.0 }) elif algorithm == 'nevergrad': from ray.tune.suggest.nevergrad import NevergradSearch from nevergrad import optimizers from nevergrad import p as ngp algo = NevergradSearch( optimizers.TwoPointsDE( ngp.Instrumentation( update_rule=ngp.Choice(update_rules), K=ngp.Scalar(lower=K_bounds['min'], upper=K_bounds['max']).set_integer_casting(), N=ngp.Scalar(lower=N_bounds['min'], upper=N_bounds['max']).set_integer_casting(), L=ngp.Scalar(lower=L_bounds['min'], upper=L_bounds['max']).set_integer_casting(), )), None, # since the optimizer is already instrumented with kwargs metric="mean_loss", mode="min") elif algorithm == 'skopt': from skopt import Optimizer from ray.tune.suggest.skopt import SkOptSearch optimizer = Optimizer([ update_rules, tuple(K_bounds.values()), tuple(N_bounds.values()), tuple(L_bounds.values()) ]) algo = SkOptSearch( optimizer, ["update_rule", "K", "N", "L"], metric="mean_loss", mode="min", points_to_evaluate=[ ['random-same', 3, 16, 8], ['random-same', 8, 16, 8], ['random-same', 8, 16, 128], ], ) elif algorithm == 'dragonfly': # TODO: doesn't work from ray.tune.suggest.dragonfly import DragonflySearch from dragonfly.exd.experiment_caller import EuclideanFunctionCaller from dragonfly.opt.gp_bandit import EuclideanGPBandit # from dragonfly.exd.experiment_caller import CPFunctionCaller # from dragonfly.opt.gp_bandit import CPGPBandit from dragonfly import load_config domain_config = load_config({ "domain": [ { "name": "update_rule", "type": "discrete", "dim": 1, "items": update_rules }, { "name": "K", "type": "int", "min": K_bounds['min'], "max": K_bounds['max'], # "dim": 1 }, { "name": "N", "type": "int", "min": N_bounds['min'], "max": N_bounds['max'], # "dim": 1 }, { "name": "L", "type": "int", "min": L_bounds['min'], "max": L_bounds['max'], # "dim": 1 } ] }) func_caller = EuclideanFunctionCaller( None, domain_config.domain.list_of_domains[0]) optimizer = EuclideanGPBandit(func_caller, ask_tell_mode=True) algo = DragonflySearch( optimizer, metric="mean_loss", mode="min", points_to_evaluate=[ ['random-same', 3, 16, 8], ['random-same', 8, 16, 8], ['random-same', 8, 16, 128], ], ) elif algorithm == 'bohb': from ConfigSpace import ConfigurationSpace from ConfigSpace import hyperparameters as CSH from ray.tune.suggest.bohb import TuneBOHB config_space = ConfigurationSpace() config_space.add_hyperparameter( CSH.CategoricalHyperparameter("update_rule", choices=update_rules)) config_space.add_hyperparameter( CSH.UniformIntegerHyperparameter(name='K', lower=K_bounds['min'], upper=K_bounds['max'])) config_space.add_hyperparameter( CSH.UniformIntegerHyperparameter(name='N', lower=N_bounds['min'], upper=N_bounds['max'])) config_space.add_hyperparameter( CSH.UniformIntegerHyperparameter(name='L', lower=L_bounds['min'], upper=L_bounds['max'])) algo = TuneBOHB(config_space, metric="mean_loss", mode="min") elif algorithm == 'zoopt': from ray.tune.suggest.zoopt import ZOOptSearch from zoopt import ValueType space = { "update_rule": (ValueType.DISCRETE, range(0, len(update_rules)), False), "K": (ValueType.DISCRETE, range(K_bounds['min'], K_bounds['max'] + 1), True), "N": (ValueType.DISCRETE, range(N_bounds['min'], N_bounds['max'] + 1), True), "L": (ValueType.DISCRETE, range(L_bounds['min'], L_bounds['max'] + 1), True), } # TODO: change budget to a large value algo = ZOOptSearch(budget=10, dim_dict=space, metric="mean_loss", mode="min") # TODO: use more appropriate arguments for schedulers: # https://docs.ray.io/en/master/tune/api_docs/schedulers.html if scheduler == 'fifo': sched = None # Tune defaults to FIFO elif scheduler == 'pbt': from ray.tune.schedulers import PopulationBasedTraining from random import randint sched = PopulationBasedTraining( metric="mean_loss", mode="min", hyperparam_mutations={ "update_rule": update_rules, "K": lambda: randint(K_bounds['min'], K_bounds['max']), "N": lambda: randint(N_bounds['min'], N_bounds['max']), "L": lambda: randint(L_bounds['min'], L_bounds['max']), }) elif scheduler == 'ahb' or scheduler == 'asha': # https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#asha-tune-schedulers-ashascheduler from ray.tune.schedulers import AsyncHyperBandScheduler sched = AsyncHyperBandScheduler(metric="mean_loss", mode="min") elif scheduler == 'hb': from ray.tune.schedulers import HyperBandScheduler sched = HyperBandScheduler(metric="mean_loss", mode="min") elif algorithm == 'bohb' or scheduler == 'bohb': from ray.tune.schedulers import HyperBandForBOHB sched = HyperBandForBOHB(metric="mean_loss", mode="min") elif scheduler == 'msr': from ray.tune.schedulers import MedianStoppingRule sched = MedianStoppingRule(metric="mean_loss", mode="min") init_ray( address=getenv("ip_head"), redis_password=getenv('redis_password'), ) analysis = tune.run( trainable, name='mlencrypt_research', config={ "monitor": True, "env_config": { "wandb": { "project": "mlencrypt-research", "sync_tensorboard": True, }, }, }, # resources_per_trial={"cpu": 1, "gpu": 3}, local_dir='./ray_results', export_formats=['csv'], # TODO: add other formats? num_samples=num_samples, loggers=[ tune.logger.JsonLogger, tune.logger.CSVLogger, tune.logger.TBXLogger, WandbLogger ], search_alg=algo, scheduler=sched, queue_trials=True, ) try: wandbsweep(analysis) except wandbCommError: # see https://docs.wandb.com/sweeps/ray-tune#feature-compatibility pass best_config = analysis.get_best_config(metric='mean_loss', mode='min') print(f"Best config: {best_config}") shutdown_ray()
def _test_distillbert(method='BlendSearch'): max_num_epoch = 64 num_samples = -1 time_budget_s = 10800 search_space = { # You can mix constants with search space objects. "num_train_epochs": flaml.tune.loguniform(1, max_num_epoch), "learning_rate": flaml.tune.loguniform(1e-6, 1e-4), "adam_beta1": flaml.tune.uniform(0.8, 0.99), "adam_beta2": flaml.tune.loguniform(98e-2, 9999e-4), "adam_epsilon": flaml.tune.loguniform(1e-9, 1e-7), } start_time = time.time() ray.init(num_cpus=4, num_gpus=4) if 'ASHA' == method: algo = None elif 'BOHB' == method: from ray.tune.schedulers import HyperBandForBOHB from ray.tune.suggest.bohb import tuneBOHB algo = tuneBOHB(max_concurrent=4) scheduler = HyperBandForBOHB(max_t=max_num_epoch) elif 'Optuna' == method: from ray.tune.suggest.optuna import OptunaSearch algo = OptunaSearch() elif 'CFO' == method: from flaml import CFO algo = CFO(points_to_evaluate=[{ "num_train_epochs": 1, }]) elif 'BlendSearch' == method: from flaml import BlendSearch algo = BlendSearch(points_to_evaluate=[{ "num_train_epochs": 1, }]) elif 'Dragonfly' == method: from ray.tune.suggest.dragonfly import DragonflySearch algo = DragonflySearch() elif 'SkOpt' == method: from ray.tune.suggest.skopt import SkOptSearch algo = SkOptSearch() elif 'Nevergrad' == method: from ray.tune.suggest.nevergrad import NevergradSearch import nevergrad as ng algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne) elif 'ZOOpt' == method: from ray.tune.suggest.zoopt import ZOOptSearch algo = ZOOptSearch(budget=num_samples) elif 'Ax' == method: from ray.tune.suggest.ax import AxSearch algo = AxSearch() elif 'HyperOpt' == method: from ray.tune.suggest.hyperopt import HyperOptSearch algo = HyperOptSearch() scheduler = None if method != 'BOHB': from ray.tune.schedulers import ASHAScheduler scheduler = ASHAScheduler(max_t=max_num_epoch, grace_period=1) scheduler = None analysis = ray.tune.run( train_distilbert, metric=HP_METRIC, mode=MODE, # You can add "gpu": 1 to allocate GPUs resources_per_trial={"gpu": 1}, config=search_space, local_dir='test/logs/', num_samples=num_samples, time_budget_s=time_budget_s, keep_checkpoints_num=1, checkpoint_score_attr=HP_METRIC, scheduler=scheduler, search_alg=algo) ray.shutdown() best_trial = analysis.get_best_trial(HP_METRIC, MODE, "all") metric = best_trial.metric_analysis[HP_METRIC][MODE] logger.info(f"method={method}") logger.info(f"n_trials={len(analysis.trials)}") logger.info(f"time={time.time()-start_time}") logger.info(f"Best model eval {HP_METRIC}: {metric:.4f}") logger.info(f"Best model parameters: {best_trial.config}")
def tune(args): search_space = get_search_space(args.config_name) config = get_config(args.config_name) store = get_target_path(config, 'hptune') if args.overwrite: if os.path.isdir(store): shutil.rmtree(store) else: if os.path.isdir(store): raise ValueError( f'The tune directory {store} exists. Set flag "--overwrite" ' 'if you want to overwrite runs - all existing runs will be lost!') os.makedirs(store) config.update({ 'is_tune': True, 'small_aoi': args.small_aoi, }) ngpu = torch.cuda.device_count() ncpu = os.cpu_count() max_concurrent = int( np.min(( np.floor(ncpu / config['ncpu_per_run']), np.floor(ngpu / config['ngpu_per_run']) )) ) print( '\nTuning hyperparameters;\n' f' Available resources: {ngpu} GPUs | {ncpu} CPUs\n' f' Number of concurrent runs: {max_concurrent}\n' ) bobh_search = TuneBOHB( space=search_space, max_concurrent=max_concurrent, metric=config['metric'], mode='min' ) bohb_scheduler = HyperBandForBOHB( time_attr='epoch', metric=config['metric'], mode='min', max_t=config['max_t'], reduction_factor=config['halving_factor']) if args.run_single: logging.warning('Starting test run.') e = Emulator(search_space.sample_configuration()) logging.warning('Starting training loop.') e._train() logging.warning('Finishing test run.') sys.exit('0') ray.tune.run( Emulator, config={'hc_config': config}, resources_per_trial={ 'cpu': config['ncpu_per_run'], 'gpu': config['ngpu_per_run']}, num_samples=config['num_samples'], local_dir=store, raise_on_failed_trial=True, verbose=1, with_server=False, ray_auto_init=False, search_alg=bobh_search, scheduler=bohb_scheduler, loggers=[JsonLogger, CSVLogger], keep_checkpoints_num=1, reuse_actors=False, stop={'patience_counter': config['patience']} ) summarize_run(store, overwrite=True)
utils_params.save_config(run_paths['path_gin'], gin.config_str()) # setup pipeline ds_train, ds_val, ds_test = load_from_tfrecords() # model model = TransformerS2S() trainer = Trainer(model, ds_train, ds_val, run_paths) for val_accuracy in trainer.train(): tune.report(val_accuracy=val_accuracy) algo = TuneBOHB(max_concurrent=4, metric="val_accuracy", mode="max") bohb = HyperBandForBOHB(time_attr="training_iteration", metric="val_accuracy", mode="max", max_t=100) config_name = 'transformerS2S' if config_name == 'lstm': config = { "LSTM.rnn_units1": tune.randint(16, 64), "LSTM.rnn_units2": tune.randint(8, 32), "LSTM.dense_units": tune.randint(12, 64), "LSTM.dropout_rate": tune.uniform(0, 0.8), "load_from_tfrecords.batch_size": tune.choice([8, 16, 32, 64, 128]) } elif config_name == 'dense': config = {
def __init__(self, estimator, early_stopping=None, scoring=None, n_jobs=None, cv=5, refit=True, verbose=0, error_score="raise", return_train_score=False, max_iters=10, use_gpu=False): self.estimator = estimator if early_stopping is not None and self._can_early_stop(): self.max_iters = max_iters if isinstance(early_stopping, str): if early_stopping in TuneBaseSearchCV.defined_schedulers: if early_stopping == "PopulationBasedTraining": self.early_stopping = PopulationBasedTraining( metric="average_test_score") elif early_stopping == "AsyncHyperBandScheduler": self.early_stopping = AsyncHyperBandScheduler( metric="average_test_score") elif early_stopping == "HyperBandScheduler": self.early_stopping = HyperBandScheduler( metric="average_test_score") elif early_stopping == "HyperBandForBOHB": self.early_stopping = HyperBandForBOHB( metric="average_test_score") elif early_stopping == "MedianStoppingRule": self.early_stopping = MedianStoppingRule( metric="average_test_score") elif early_stopping == "ASHAScheduler": self.early_stopping = ASHAScheduler( metric="average_test_score") else: raise ValueError( "{} is not a defined scheduler. " "Check the list of available schedulers.".format( early_stopping)) elif isinstance(early_stopping, TrialScheduler): self.early_stopping = early_stopping self.early_stopping.metric = "average_test_score" else: raise TypeError("Scheduler must be a str or tune scheduler") else: warnings.warn("Early stopping is not enabled. " "To enable early stopping, pass in a supported " "scheduler from Tune and ensure the estimator " "has `partial_fit`.") self.max_iters = 1 self.early_stopping = None self.cv = cv self.scoring = scoring self.n_jobs = n_jobs self.refit = refit self.verbose = verbose self.error_score = error_score self.return_train_score = return_train_score self.use_gpu = use_gpu
# from ray.tune.utils import validate_save_restore # validate_save_restore(Worker) # client = MlflowClient(tracking_uri='localhost:5000') experiment_name = f'dx7-vae-{postfix}' #+experiment_name_creator() # experiment_id = client.create_experiment(experiment_name) experiment_metrics = dict(metric="loss/accuracy", mode="max") config_space = CS.ConfigurationSpace() [ config_space.add_hyperparameter( CS.UniformFloatHyperparameter(f'VOICE..{key}', lower=0., upper=1)) for key in VOICE_KEYS ] bohb_hyperband = HyperBandForBOHB(time_attr="training_iteration", max_t=16, **experiment_metrics) bohb_search = TuneBOHB(config_space, max_concurrent=1, **experiment_metrics) tune.run( Worker, config={ 'config_generator': config, 'experiment_name': experiment_name, 'points_per_epoch': 2 }, trial_name_creator=trial_name_creator, resources_per_trial={'gpu': 1}, checkpoint_freq=2,
def tune_training(config): global global_dict from hyper_tune import TuneTrainable import ray from ray import tune from ray.tune.schedulers import HyperBandScheduler, HyperBandForBOHB, AsyncHyperBandScheduler, FIFOScheduler import ray.tune.suggest as suggest from ray.tune import sample_from, Experiment import ConfigSpace as CS import ConfigSpace.hyperparameters as CSH from ray.tune.suggest.hyperopt import HyperOptSearch from hyperopt import hp ray.init() stop_dict = { 'num_examples': config['tune']['max_t'], 'no_change_in_accu': 2 } if config['tune']['tuning_method'] == 'grid': config['training']['dataset_size'] = tune.grid_search( [0.2, 0.4, 0.6, 0.8]) tune.run(TuneTrainable, config=config, num_samples=config['tune']['n_samples'], name=config['experiment_name'], resume=False, checkpoint_at_end=False, resources_per_trial=config['tune']['resources_per_trial'], local_dir=config['tune']['working_dir'], stop=stop_dict) elif config['tune']['tuning_method'] == 'bohb': config_space = CS.ConfigurationSpace(seed=42) # replace | convention is a kludge because of BOHB's specialized interface config_space.add_hyperparameters([ CSH.UniformIntegerHyperparameter('replace|num_layers', lower=1, upper=5), CSH.UniformIntegerHyperparameter('replace|hidden_size', lower=64, upper=512), CSH.UniformIntegerHyperparameter('replace|embedding_size', lower=64, upper=512), CSH.UniformFloatHyperparameter('replace|dropout', lower=0.0, upper=0.5), CSH.CategoricalHyperparameter('replace|bidirectional', choices=[True, False]), CSH.UniformFloatHyperparameter('replace|lr', lower=0.00001, upper=0.1, log=True) ]) bohb_hyperband = HyperBandForBOHB( time_attr='num_examples', metric=config['tune']['discriminating_metric'], mode=config['tune']['discriminating_metric_mode'], max_t=config['tune']['max_t']) bohb_search = suggest.bohb.TuneBOHB( config_space, max_concurrent=1, metric=config['tune']['discriminating_metric'], mode=config['tune']['discriminating_metric_mode']) return tune.run( TuneTrainable, config=config, scheduler=bohb_hyperband, search_alg=bohb_search, num_samples=1, name=config['experiment_name'], resources_per_trial=config['tune']['resources_per_trial'], local_dir=config['tune']['working_dir']) elif config['tune']['tuning_method'] == 'hyperopt': def get_hyperopt_space(config): if config['model'] == 'simple_lstm': return { "allocate|hidden_size": hp.quniform("hidden_size", 32, 700, 2), "allocate|embedding_size": hp.quniform("embedding_size", 32, 700, 2), "allocate|bidirectional": hp.choice("bidirectional", [True, False]), "allocate|num_layers": hp.quniform("num_layers", 1, 5, 1), "allocate|penalize_all_steps": hp.choice("penalize_all_steps", [True, False]) } elif config['model'] == 'awd_rnn': return { "allocate|hidden_size": hp.quniform("hidden_size", 32, 1024, 4), "allocate|embedding_size": hp.quniform("embedding_size", 32, 1024, 4), "allocate|num_layers": hp.quniform("num_layers", 1, 6, 1), "allocate|penalize_all_steps": hp.choice("penalize_all_steps", [True, False]), "allocate|dropouto": hp.normal("dropouto", 0.3, 0.2), "allocate|dropouth": hp.normal("dropouth", 0.3, 0.2), "allocate|dropouti": hp.normal("dropouti", 0.3, 0.2), "allocate|dropoute": hp.normal("dropoute", 0.0, 0.13), # "allocate|wdrop": hp.normal("wdrop", 0.0, 0.1), "allocate|ar_alpha": hp.normal("ar_alpha", 2, 3), "allocate|weight_decay": hp.lognormal("weight_decay", -13, 5), "allocate|lr": hp.lognormal('lr', -6, 1), "nested|tokens_config": hp.choice('tokens_config', [{ 'allocate|tokenizer': 'standard_tokenizer', 'nested|tokenization_method': hp.choice('tokenization_method', [{ 'allocate|tokenization': 'char' }, { 'allocate|tokenization': 'word', 'allocate|per_class_vocab_size': hp.uniform('per_class_vocab_size', 1000, 10000) }]) }, { 'allocate|tokenizer': 'youtokentome', 'allocate|vocab_size': hp.uniform('vocab_size', 50, 50000) }]) } elif config['model'] == 'vdcnn': return { "allocate|embedding_size": hp.quniform("embedding_size", 32, 1024, 4), "allocate|dropout": hp.normal("dropout", 0.3, 0.2), "allocate|apply_shortcut": hp.choice("apply_shortcut", [True, False]), "allocate|k": hp.normal("k", 8, 2), "allocate|dense_nlayers": hp.normal("dense_nlayers", 3, 1), "allocate|dense_nfeatures": hp.normal("dense_nfeatures", 2048, 900), "allocate|conv1_nblocks": hp.uniform("conv1_nblocks", 0, 10), "allocate|conv2_nblocks": hp.uniform("conv2_nblocks", 0, 10), "allocate|conv3_nblocks": hp.uniform("conv3_nblocks", 0, 5), "allocate|conv4_nblocks": hp.uniform("conv4_nblocks", 0, 5), "allocate|conv0_nfmaps": hp.normal("conv0_nfmaps", 64, 20), "allocate|conv1_nfmaps": hp.normal("conv1_nfmaps", 64, 20), "allocate|conv2_nfmaps": hp.normal("conv2_nfmaps", 128, 30), "allocate|conv3_nfmaps": hp.normal("conv3_nfmaps", 256, 50), "allocate|conv4_nfmaps": hp.normal("conv4_nfmaps", 512, 100), "allocate|weight_decay": hp.lognormal("weight_decay", -13, 5), "allocate|lr": hp.lognormal('lr', -6, 1), "nested|tokens_config": hp.choice('tokens_config', [{ 'allocate|tokenizer': 'standard_tokenizer', 'nested|tokenization_method': hp.choice('tokenization_method', [{ 'allocate|tokenization': 'char' }, { 'allocate|tokenization': 'word', 'allocate|per_class_vocab_size': hp.uniform('per_class_vocab_size', 1000, 10000) }]) }, { 'allocate|tokenizer': 'youtokentome', 'allocate|vocab_size': hp.uniform('vocab_size', 50, 50000) }]) } elif config['model'] == 'bert': return { "allocate|hidden_dropout": hp.normal("hidden_dropout", 0.0, 0.2), "allocate|att_dropout": hp.normal("att_dropout", 0.0, 0.2), "allocate|hidden_size": hp.quniform("hidden_size", 32, 1024, 4), "allocate|n_bert_layers": hp.uniform("n_bert_layers", 1, 8), "allocate|n_att_heads": hp.uniform("n_att_heads", 1, 8), "allocate|intermediate_dense_size": hp.quniform("intermediate_dense_size", 32, 1024, 4), "allocate|penalize_all_steps": hp.choice("penalize_all_steps", [True, False]), "allocate|weight_decay": hp.lognormal("weight_decay", -13, 5), "allocate|lr": hp.lognormal('lr', -6, 1), "nested|tokens_config": hp.choice('tokens_config', [{ 'allocate|tokenizer': 'standard_tokenizer', 'nested|tokenization_method': hp.choice('tokenization_method', [{ 'allocate|tokenization': 'char' }, { 'allocate|tokenization': 'word', 'allocate|per_class_vocab_size': hp.uniform('per_class_vocab_size', 1000, 10000) }]) }, { 'allocate|tokenizer': 'youtokentome', 'allocate|vocab_size': hp.uniform('vocab_size', 50, 50000) }]) } else: raise NotImplementedError() class HyperOptFIFO(FIFOScheduler): def on_trial_complete(self, trial_runner, trial, result): algo.save(hyper_opt_checkpoint_dir) print( f'{Fore.GREEN} Checkpointing hyperopt ...{Style.RESET_ALL}' ) return super().on_trial_complete(trial_runner, trial, result) space = get_hyperopt_space(config) algo = HyperOptSearch( space, max_concurrent=1, metric=config['tune']['discriminating_metric'], mode=config['tune']['discriminating_metric_mode'], n_initial_points=7, random_state_seed=42) hyper_opt_checkpoint_dir = os.path.join(config['tune']['working_dir'], config['experiment_name'], 'hyperopt') if config['tune']['resume']: try: algo.restore(hyper_opt_checkpoint_dir) n_trials = len(algo._hpopt_trials) print( f"{Fore.GREEN}{n_trials} trials loaded. Warm starting ...{Style.RESET_ALL}" ) except: print( f'{Fore.RED}Unable to load trials. Cold starting ...{Style.RESET_ALL}' ) tune.run(TuneTrainable, config=config, search_alg=algo, num_samples=config['tune']['n_samples'], scheduler=HyperOptFIFO(), name=config['experiment_name'], resume=False, checkpoint_at_end=False, resources_per_trial=config['tune']['resources_per_trial'], local_dir=config['tune']['working_dir'], stop=stop_dict) elif config['tune']['tuning_method'] == 'no_search': tune.run(TuneTrainable, config=config, num_samples=config['tune']['n_samples'], name=config['experiment_name'], resume=False, checkpoint_at_end=False, resources_per_trial=config['tune']['resources_per_trial'], local_dir=config['tune']['working_dir'], stop=stop_dict) else: raise NotImplementedError()
def _test_xgboost(method="BlendSearch"): try: import ray except ImportError: return if method == "BlendSearch": from flaml import tune else: from ray import tune search_space = { "max_depth": tune.randint(1, 9) if method in ["BlendSearch", "BOHB", "Optuna"] else tune.randint(1, 9), "min_child_weight": tune.choice([1, 2, 3]), "subsample": tune.uniform(0.5, 1.0), "eta": tune.loguniform(1e-4, 1e-1), } max_iter = 10 for num_samples in [128]: time_budget_s = 60 for n_cpu in [2]: start_time = time.time() # ray.init(address='auto') if method == "BlendSearch": analysis = tune.run( train_breast_cancer, config=search_space, low_cost_partial_config={ "max_depth": 1, }, cat_hp_cost={ "min_child_weight": [6, 3, 2], }, metric="eval-logloss", mode="min", max_resource=max_iter, min_resource=1, scheduler="asha", # You can add "gpu": 0.1 to allocate GPUs resources_per_trial={"cpu": 1}, local_dir="logs/", num_samples=num_samples * n_cpu, time_budget_s=time_budget_s, use_ray=True, ) else: if "ASHA" == method: algo = None elif "BOHB" == method: from ray.tune.schedulers import HyperBandForBOHB from ray.tune.suggest.bohb import TuneBOHB algo = TuneBOHB(max_concurrent=n_cpu) scheduler = HyperBandForBOHB(max_t=max_iter) elif "Optuna" == method: from ray.tune.suggest.optuna import OptunaSearch algo = OptunaSearch() elif "CFO" == method: from flaml import CFO algo = CFO( low_cost_partial_config={ "max_depth": 1, }, cat_hp_cost={ "min_child_weight": [6, 3, 2], }, ) elif "CFOCat" == method: from flaml.searcher.cfo_cat import CFOCat algo = CFOCat( low_cost_partial_config={ "max_depth": 1, }, cat_hp_cost={ "min_child_weight": [6, 3, 2], }, ) elif "Dragonfly" == method: from ray.tune.suggest.dragonfly import DragonflySearch algo = DragonflySearch() elif "SkOpt" == method: from ray.tune.suggest.skopt import SkOptSearch algo = SkOptSearch() elif "Nevergrad" == method: from ray.tune.suggest.nevergrad import NevergradSearch import nevergrad as ng algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne) elif "ZOOpt" == method: from ray.tune.suggest.zoopt import ZOOptSearch algo = ZOOptSearch(budget=num_samples * n_cpu) elif "Ax" == method: from ray.tune.suggest.ax import AxSearch algo = AxSearch() elif "HyperOpt" == method: from ray.tune.suggest.hyperopt import HyperOptSearch algo = HyperOptSearch() scheduler = None if method != "BOHB": from ray.tune.schedulers import ASHAScheduler scheduler = ASHAScheduler(max_t=max_iter, grace_period=1) analysis = tune.run( train_breast_cancer, metric="eval-logloss", mode="min", # You can add "gpu": 0.1 to allocate GPUs resources_per_trial={"cpu": 1}, config=search_space, local_dir="logs/", num_samples=num_samples * n_cpu, time_budget_s=time_budget_s, scheduler=scheduler, search_alg=algo, ) # # Load the best model checkpoint # import os # best_bst = xgb.Booster() # best_bst.load_model(os.path.join(analysis.best_checkpoint, # "model.xgb")) best_trial = analysis.get_best_trial("eval-logloss", "min", "all") accuracy = 1.0 - best_trial.metric_analysis["eval-error"]["min"] logloss = best_trial.metric_analysis["eval-logloss"]["min"] logger.info(f"method={method}") logger.info(f"n_samples={num_samples*n_cpu}") logger.info(f"time={time.time()-start_time}") logger.info(f"Best model eval loss: {logloss:.4f}") logger.info(f"Best model total accuracy: {accuracy:.4f}") logger.info(f"Best model parameters: {best_trial.config}")
def _test_roberta(method='BlendSearch'): max_num_epoch = 100 num_samples = -1 time_budget_s = 3600 search_space = { # You can mix constants with search space objects. "num_train_epochs": flaml.tune.loguniform(1, max_num_epoch), "learning_rate": flaml.tune.loguniform(1e-5, 3e-5), "weight_decay": flaml.tune.uniform(0, 0.3), "per_device_train_batch_size": flaml.tune.choice([16, 32, 64, 128]), "seed": flaml.tune.choice([12, 22, 33, 42]), } start_time = time.time() ray.init(num_cpus=4, num_gpus=4) if 'ASHA' == method: algo = None elif 'BOHB' == method: from ray.tune.schedulers import HyperBandForBOHB from ray.tune.suggest.bohb import tuneBOHB algo = tuneBOHB(max_concurrent=4) scheduler = HyperBandForBOHB(max_t=max_num_epoch) elif 'Optuna' == method: from ray.tune.suggest.optuna import OptunaSearch algo = OptunaSearch() elif 'CFO' == method: from flaml import CFO algo = CFO(points_to_evaluate=[{ "num_train_epochs": 1, "per_device_train_batch_size": 128, }]) elif 'BlendSearch' == method: from flaml import BlendSearch algo = BlendSearch( points_to_evaluate=[{ "num_train_epochs": 1, "per_device_train_batch_size": 128, }]) elif 'Dragonfly' == method: from ray.tune.suggest.dragonfly import DragonflySearch algo = DragonflySearch() elif 'SkOpt' == method: from ray.tune.suggest.skopt import SkOptSearch algo = SkOptSearch() elif 'Nevergrad' == method: from ray.tune.suggest.nevergrad import NevergradSearch import nevergrad as ng algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne) elif 'ZOOpt' == method: from ray.tune.suggest.zoopt import ZOOptSearch algo = ZOOptSearch(budget=num_samples) elif 'Ax' == method: from ray.tune.suggest.ax import AxSearch algo = AxSearch(max_concurrent=3) elif 'HyperOpt' == method: from ray.tune.suggest.hyperopt import HyperOptSearch algo = HyperOptSearch() scheduler = None if method != 'BOHB': from ray.tune.schedulers import ASHAScheduler scheduler = ASHAScheduler(max_t=max_num_epoch, grace_period=1) scheduler = None analysis = ray.tune.run(train_roberta, metric=HP_METRIC, mode=MODE, resources_per_trial={ "gpu": 4, "cpu": 4 }, config=search_space, local_dir='logs/', num_samples=num_samples, time_budget_s=time_budget_s, keep_checkpoints_num=1, checkpoint_score_attr=HP_METRIC, scheduler=scheduler, search_alg=algo) ray.shutdown() best_trial = analysis.get_best_trial(HP_METRIC, MODE, "all") metric = best_trial.metric_analysis[HP_METRIC][MODE] logger.info(f"method={method}") logger.info(f"n_trials={len(analysis.trials)}") logger.info(f"time={time.time()-start_time}") logger.info(f"Best model eval {HP_METRIC}: {metric:.4f}") logger.info(f"Best model parameters: {best_trial.config}")
def cifar10_main(method="BlendSearch", num_samples=10, max_num_epochs=100, gpus_per_trial=1): data_dir = os.path.abspath("test/data") load_data(data_dir) # Download data for all trials before starting the run if method == "BlendSearch": from flaml import tune else: from ray import tune if method in ["BOHB"]: config = { "l1": tune.randint(2, 8), "l2": tune.randint(2, 8), "lr": tune.loguniform(1e-4, 1e-1), "num_epochs": tune.qloguniform(1, max_num_epochs, q=1), "batch_size": tune.randint(1, 4), } else: config = { "l1": tune.randint(2, 9), "l2": tune.randint(2, 9), "lr": tune.loguniform(1e-4, 1e-1), "num_epochs": tune.loguniform(1, max_num_epochs), "batch_size": tune.randint(1, 5), } import ray time_budget_s = 600 np.random.seed(7654321) start_time = time.time() if method == "BlendSearch": result = tune.run( ray.tune.with_parameters(train_cifar, data_dir=data_dir), config=config, metric="loss", mode="min", low_cost_partial_config={"num_epochs": 1}, max_resource=max_num_epochs, min_resource=1, scheduler="asha", resources_per_trial={ "cpu": 1, "gpu": gpus_per_trial }, local_dir="logs/", num_samples=num_samples, time_budget_s=time_budget_s, use_ray=True, ) else: if "ASHA" == method: algo = None elif "BOHB" == method: from ray.tune.schedulers import HyperBandForBOHB from ray.tune.suggest.bohb import TuneBOHB algo = TuneBOHB() scheduler = HyperBandForBOHB(max_t=max_num_epochs) elif "Optuna" == method: from ray.tune.suggest.optuna import OptunaSearch algo = OptunaSearch(seed=10) elif "CFO" == method: from flaml import CFO algo = CFO(low_cost_partial_config={ "num_epochs": 1, }) elif "Nevergrad" == method: from ray.tune.suggest.nevergrad import NevergradSearch import nevergrad as ng algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne) if method != "BOHB": from ray.tune.schedulers import ASHAScheduler scheduler = ASHAScheduler(max_t=max_num_epochs, grace_period=1) result = tune.run( tune.with_parameters(train_cifar, data_dir=data_dir), resources_per_trial={ "cpu": 1, "gpu": gpus_per_trial }, config=config, metric="loss", mode="min", num_samples=num_samples, time_budget_s=time_budget_s, scheduler=scheduler, search_alg=algo, ) ray.shutdown() logger.info(f"method={method}") logger.info(f"#trials={len(result.trials)}") logger.info(f"time={time.time()-start_time}") best_trial = result.get_best_trial("loss", "min", "all") logger.info("Best trial config: {}".format(best_trial.config)) logger.info("Best trial final validation loss: {}".format( best_trial.metric_analysis["loss"]["min"])) logger.info("Best trial final validation accuracy: {}".format( best_trial.metric_analysis["accuracy"]["max"])) best_trained_model = Net(2**best_trial.config["l1"], 2**best_trial.config["l2"]) device = "cpu" if torch.cuda.is_available(): device = "cuda:0" if gpus_per_trial > 1: best_trained_model = nn.DataParallel(best_trained_model) best_trained_model.to(device) checkpoint_path = os.path.join(best_trial.checkpoint.value, "checkpoint") model_state, optimizer_state = torch.load(checkpoint_path) best_trained_model.load_state_dict(model_state) test_acc = _test_accuracy(best_trained_model, device) logger.info("Best trial test set accuracy: {}".format(test_acc))