def run_exp_1(self): np.random.seed(162) search_alg, cost = self.set_basic_conf() search_alg = ConcurrencyLimiter(search_alg, 1) results_exp_1 = tune.run( cost, num_samples=5, search_alg=search_alg, verbose=0) self.log_dir = os.path.join(self.tmpdir, "warmStartTest.pkl") search_alg.save(self.log_dir) return results_exp_1
def run_explicit_restore(self, random_state, checkpoint_path): np.random.set_state(random_state) search_alg2, cost = self.set_basic_conf() search_alg2 = ConcurrencyLimiter(search_alg2, 1) search_alg2.restore(checkpoint_path) return tune.run(cost, num_samples=5, search_alg=search_alg2, scheduler=self.get_scheduler(), verbose=0)
def run_from_experiment_restore(self, random_state): search_alg, cost = self.set_basic_conf() search_alg = ConcurrencyLimiter(search_alg, 1) search_alg.restore_from_dir( os.path.join(self.tmpdir, self.experiment_name)) results = tune.run(cost, num_samples=5, search_alg=search_alg, verbose=0, name=self.experiment_name, local_dir=self.tmpdir) return results
def run_part_from_scratch(self): np.random.seed(162) search_alg, cost = self.set_basic_conf() search_alg = ConcurrencyLimiter(search_alg, 1) results_exp_1 = tune.run(cost, num_samples=5, search_alg=search_alg, verbose=0, name=self.experiment_name, local_dir=self.tmpdir) checkpoint_path = os.path.join(self.tmpdir, "warmStartTest.pkl") search_alg.save(checkpoint_path) return results_exp_1, np.random.get_state(), checkpoint_path
def run_from_experiment_restore(self, random_state): search_alg, cost = self.set_basic_conf() if not isinstance(search_alg, ConcurrencyLimiter): search_alg = ConcurrencyLimiter(search_alg, 1) search_alg.restore_from_dir( os.path.join(self.tmpdir, self.experiment_name)) results = tune.run(cost, num_samples=5, search_alg=search_alg, scheduler=self.get_scheduler(), verbose=0, name=self.experiment_name, local_dir=self.tmpdir, reuse_actors=True) return results
def run_blendsearch_tune_w_budget(time_budget_s=10): """run BlendSearch with given time_budget_s""" algo = BlendSearch( metric="mean_loss", mode="min", space={ "width": tune.uniform(0, 20), "height": tune.uniform(-100, 100), "activation": tune.choice(["relu", "tanh"]), }, ) algo.set_search_properties(config={"time_budget_s": time_budget_s}) algo = ConcurrencyLimiter(algo, max_concurrent=4) scheduler = AsyncHyperBandScheduler() analysis = tune.run( easy_objective, metric="mean_loss", mode="min", search_alg=algo, scheduler=scheduler, time_budget_s=time_budget_s, num_samples=-1, config={ "steps": 100, }, ) print("Best hyperparameters found were: ", analysis.best_config)
def set_algorithm(experiment_name, config): ''' Configure search algorithm. ''' if args.algorithm == 'hyperopt': algorithm = HyperOptSearch(points_to_evaluate=best_params) elif args.algorithm == 'ax': ax_client = AxClient(enforce_sequential_optimization=False) ax_client.create_experiment(name=experiment_name, parameters=config, objective_name="minimum", minimize=True) algorithm = AxSearch(ax_client=ax_client, points_to_evaluate=best_params) elif args.algorithm == 'nevergrad': algorithm = NevergradSearch( points_to_evaluate=best_params, optimizer=ng.optimizers.registry["PortfolioDiscreteOnePlusOne"]) elif args.algorithm == 'optuna': algorithm = OptunaSearch(points_to_evaluate=best_params, seed=args.seed) elif args.algorithm == 'pbt': algorithm = PopulationBasedTraining( time_attr="training_iteration", perturbation_interval=args.perturbation, hyperparam_mutations=config, synch=True) elif args.algorithm == 'random': algorithm = BasicVariantGenerator(max_concurrent=args.jobs) if args.algorithm not in ['random', 'pbt']: algorithm = ConcurrencyLimiter(algorithm, max_concurrent=args.jobs) return algorithm
def _get_search_algorithm( self, search_algorithm, config_space, metric, mode, max_concurrent): if search_algorithm == "BO": algo = BayesOptSearch( utility_kwargs={ "kind": "ucb", "kappa": 2.5, "xi": 0.0 }) algo = ConcurrencyLimiter(algo, max_concurrent=max_concurrent) scheduler = AsyncHyperBandScheduler() elif search_algorithm == "BOHB": experiment_metrics = dict(metric=metric, mode=mode) algo = TuneBOHB( config_space, max_concurrent=max_concurrent, **experiment_metrics) scheduler = HyperBandForBOHB( time_attr="training_iteration", reduction_factor=4) elif search_algorithm == "PBT": # Problem of PBT: It mutates the param value, so sometimes, it generates unacceptable values algo = None scheduler = PopulationBasedTraining( time_attr='training_iteration', perturbation_interval=2, # Every N time_attr units, "perturb" the parameters. hyperparam_mutations=config_space) elif search_algorithm == "GRID" or search_algorithm == "RANDOM": algo = None scheduler = None else: raise Exception(search_algorithm, "is not available yet") return algo, scheduler
def set_basic_conf(self): space_config = [ { "name": "width", "type": "num", "lb": 0, "ub": 20 }, { "name": "height", "type": "num", "lb": -100, "ub": 100 }, ] space = HEBODesignSpace().parse(space_config) def cost(param, reporter): reporter(loss=(param["height"] - 14)**2 - abs(param["width"] - 3)) search_alg = HEBOSearch(space=space, metric="loss", mode="min", random_state_seed=5) # This is done on purpose to speed up the test, as HEBO will # cache suggestions search_alg = ConcurrencyLimiter(search_alg, max_concurrent=10) return search_alg, cost
def test_blendsearch_tune(smoke_test=True): try: from ray import tune from ray.tune.suggest import ConcurrencyLimiter from ray.tune.schedulers import AsyncHyperBandScheduler from ray.tune.suggest.flaml import BlendSearch except ImportError: print("ray[tune] is not installed, skipping test") return import numpy as np algo = BlendSearch() algo = ConcurrencyLimiter(algo, max_concurrent=4) scheduler = AsyncHyperBandScheduler() analysis = tune.run( easy_objective, metric="mean_loss", mode="min", search_alg=algo, scheduler=scheduler, num_samples=10 if smoke_test else 100, config={ "steps": 100, "width": tune.uniform(0, 20), "height": tune.uniform(-100, 100), # This is an ignored parameter. "activation": tune.choice(["relu", "tanh"]), "test4": np.zeros((3, 1)), }, ) print("Best hyperparameters found were: ", analysis.best_config)
def set_basic_conf(self): from dragonfly.opt.gp_bandit import EuclideanGPBandit from dragonfly.exd.experiment_caller import EuclideanFunctionCaller from dragonfly import load_config def cost(space, reporter): height, width = space["point"] reporter(loss=(height - 14)**2 - abs(width - 3)) domain_vars = [{ "name": "height", "type": "float", "min": -10, "max": 10 }, { "name": "width", "type": "float", "min": 0, "max": 20 }] domain_config = load_config({"domain": domain_vars}) func_caller = EuclideanFunctionCaller( None, domain_config.domain.list_of_domains[0]) optimizer = EuclideanGPBandit(func_caller, ask_tell_mode=True) search_alg = DragonflySearch( optimizer, metric="loss", mode="min", random_state_seed=162) search_alg = ConcurrencyLimiter(search_alg, max_concurrent=1000) return search_alg, cost
def test_convergence_gaussian_process(self): np.random.seed(0) ray.init(local_mode=True, num_cpus=1, num_gpus=1) space = { "x": (0, 20) # This is the space of parameters to explore } resources_per_trial = {"cpu": 1, "gpu": 0} # Following bayesian optimization gp = BayesOptSearch(space, metric="loss", mode="min", random_search_steps=10) gp.repeat_float_precision = 5 gp = ConcurrencyLimiter(gp, 1) # Execution of the BO. analysis = tune.run( loss, # stop=EarlyStopping("loss", mode="min", patience=5), search_alg=gp, config={}, num_samples=100, # Number of iterations resources_per_trial=resources_per_trial, raise_on_failed_trial=False, fail_fast=True, verbose=1) assert len(analysis.trials) == 41 ray.shutdown()
def test_convergence_gaussian_process(self): np.random.seed(0) ray.init(local_mode=True, num_cpus=1, num_gpus=1) # This is the space of parameters to explore space = {"x": tune.uniform(0, 20)} resources_per_trial = {"cpu": 1, "gpu": 0} # Following bayesian optimization gp = BayesOptSearch(random_search_steps=10) gp.repeat_float_precision = 5 gp = ConcurrencyLimiter(gp, 1) # Execution of the BO. analysis = tune.run( loss, metric="loss", mode="min", # stop=EarlyStopping("loss", mode="min", patience=5), search_alg=gp, config=space, num_samples=100, # Number of iterations resources_per_trial=resources_per_trial, raise_on_failed_trial=False, fail_fast=True, verbose=1) assert len(analysis.trials) in {13, 40, 43} # it is 43 on the cluster? assert math.isclose(analysis.best_config["x"], 0, abs_tol=1e-5)
def run_exp_3(self): print("FULL RUN") np.random.seed(162) search_alg3, cost = self.set_basic_conf() search_alg3 = ConcurrencyLimiter(search_alg3, 1) return tune.run( cost, num_samples=10, search_alg=search_alg3, verbose=0)
def backtest_tune(ticks: np.ndarray, backtest_config: dict, current_best: Union[dict, list] = None): config = create_config(backtest_config) n_days = round_((ticks[-1][2] - ticks[0][2]) / (1000 * 60 * 60 * 24), 0.1) session_dirpath = make_get_filepath(os.path.join('reports', backtest_config['exchange'], backtest_config['symbol'], f"{n_days}_days_{ts_to_date(time())[:19].replace(':', '')}", '')) iters = 10 if 'iters' in backtest_config: iters = backtest_config['iters'] else: print('Parameter iters should be defined in the configuration. Defaulting to 10.') num_cpus = 2 if 'num_cpus' in backtest_config: num_cpus = backtest_config['num_cpus'] else: print('Parameter num_cpus should be defined in the configuration. Defaulting to 2.') n_particles = 10 if 'n_particles' in backtest_config: n_particles = backtest_config['n_particles'] phi1 = 1.4962 phi2 = 1.4962 omega = 0.7298 if 'options' in backtest_config: phi1 = backtest_config['options']['c1'] phi2 = backtest_config['options']['c2'] omega = backtest_config['options']['w'] current_best_params = [] if current_best: if type(current_best) == list: for c in current_best: c = clean_start_config(c, config, backtest_config['ranges']) current_best_params.append(c) else: current_best = clean_start_config(current_best, config, backtest_config['ranges']) current_best_params.append(current_best) ray.init(num_cpus=num_cpus, logging_level=logging.FATAL, log_to_driver=False) pso = ng.optimizers.ConfiguredPSO(transform='identity', popsize=n_particles, omega=omega, phip=phi1, phig=phi2) algo = NevergradSearch(optimizer=pso, points_to_evaluate=current_best_params) algo = ConcurrencyLimiter(algo, max_concurrent=num_cpus) scheduler = AsyncHyperBandScheduler() analysis = tune.run(tune.with_parameters(backtest, ticks=ticks), metric='objective', mode='max', name='search', search_alg=algo, scheduler=scheduler, num_samples=iters, config=config, verbose=1, reuse_actors=True, local_dir=session_dirpath, progress_reporter=LogReporter(metric_columns=['daily_gain', 'closest_liquidation', 'objective'], parameter_columns=[k for k in backtest_config['ranges']])) ray.shutdown() df = analysis.results_df df.reset_index(inplace=True) df.drop(columns=['trial_id', 'time_this_iter_s', 'done', 'timesteps_total', 'episodes_total', 'training_iteration', 'experiment_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'timesteps_since_restore', 'iterations_since_restore', 'experiment_tag'], inplace=True) df.to_csv(os.path.join(backtest_config['session_dirpath'], 'results.csv'), index=False) print('Best candidate found:') pprint.pprint(analysis.best_config) plot_wrap(backtest_config, ticks, clean_result_config(analysis.best_config)) return analysis
def _test_flaml_raytune_consistency(num_samples=-1, max_concurrent_trials=1, searcher_name="cfo"): try: from ray import tune as raytune except ImportError: print( "skip _test_flaml_raytune_consistency because ray tune cannot be imported." ) return np.random.seed(100) searcher = setup_searcher(searcher_name) analysis = tune.run( evaluate_config, # the function to evaluate a config config=config_search_space, # the search space low_cost_partial_config= low_cost_partial_config, # a initial (partial) config with low cost metric="metric", # the name of the metric used for optimization mode="min", # the optimization mode, 'min' or 'max' num_samples= num_samples, # the maximal number of configs to try, -1 means infinite time_budget_s=None, # the time budget in seconds local_dir="logs/", # the local directory to store logs search_alg=searcher, # verbose=0, # verbosity # use_ray=True, # uncomment when performing parallel tuning using ray ) flaml_best_config = analysis.best_config flaml_config_in_results = [v["config"] for v in analysis.results.values()] print(analysis.best_trial.last_result) # the best trial's result print("best flaml", searcher_name, flaml_best_config) # the best config print("flaml config in results", searcher_name, flaml_config_in_results) np.random.seed(100) searcher = setup_searcher(searcher_name) from ray.tune.suggest import ConcurrencyLimiter search_alg = ConcurrencyLimiter(searcher, max_concurrent_trials) analysis = raytune.run( evaluate_config, # the function to evaluate a config config=config_search_space, metric="metric", # the name of the metric used for optimization mode="min", # the optimization mode, 'min' or 'max' num_samples= num_samples, # the maximal number of configs to try, -1 means infinite local_dir="logs/", # the local directory to store logs # max_concurrent_trials=max_concurrent_trials, # resources_per_trial={"cpu": max_concurrent_trials, "gpu": 0}, search_alg=search_alg, ) ray_best_config = analysis.best_config ray_config_in_results = [v["config"] for v in analysis.results.values()] print(analysis.best_trial.last_result) # the best trial's result print("ray best", searcher_name, analysis.best_config) # the best config print("ray config in results", searcher_name, ray_config_in_results) assert ray_best_config == flaml_best_config, "best config should be the same" assert (flaml_config_in_results == ray_config_in_results ), "results from raytune and flaml should be the same"
def run_full(self): np.random.seed(162) search_alg3, cost = self.set_basic_conf() search_alg3 = ConcurrencyLimiter(search_alg3, 1) return tune.run(cost, num_samples=10, search_alg=search_alg3, scheduler=self.get_scheduler(), verbose=0)
def backtest_tune(ohlc: np.ndarray, backtest_config: dict): config = create_config(backtest_config) if not os.path.isdir(os.path.join('reports', backtest_config['symbol'])): os.makedirs(os.path.join('reports', backtest_config['symbol']), exist_ok=True) report_path = os.path.join('reports', backtest_config['symbol']) iters = 10 if 'iters' in backtest_config: iters = backtest_config['iters'] else: print( 'Parameter iters should be defined in the configuration. Defaulting to 10.' ) num_cpus = 2 if 'num_cpus' in backtest_config: num_cpus = backtest_config['num_cpus'] else: print( 'Parameter num_cpus should be defined in the configuration. Defaulting to 2.' ) initial_points = max(1, min(int(iters / 10), 20)) ray.init(num_cpus=num_cpus ) # , logging_level=logging.FATAL, log_to_driver=False) algo = HyperOptSearch(n_initial_points=initial_points) algo = ConcurrencyLimiter(algo, max_concurrent=num_cpus) scheduler = AsyncHyperBandScheduler() analysis = tune.run(tune.with_parameters(backtest, ohlc=ohlc), metric='objective', mode='max', name='search', search_alg=algo, scheduler=scheduler, num_samples=iters, config=config, verbose=1, reuse_actors=True, local_dir=report_path) ray.shutdown() session_path = os.path.join( os.path.join('sessions', backtest_config['symbol']), backtest_config['session_name']) if not os.path.isdir(session_path): os.makedirs(session_path, exist_ok=True) print('Best candidate found is: ', analysis.best_config) json.dump(analysis.best_config, open(os.path.join(session_path, 'best_config.json'), 'w'), indent=4) result = backtest(analysis.best_config, ohlc, True) result.to_csv(os.path.join(session_path, 'best_trades.csv'), index=False) return analysis
def optimize_hyperparameters( train_model, create_model, data_train, data_test, search_space, model_kwargs_str, callbacks, hyperparams_file_name, random_seed, model_path, epochs, n_steps, num_samples_optim, ): tmp_dir = tempfile.TemporaryDirectory(dir=os.getcwd()) ray.shutdown() ray.init(log_to_driver=False, local_mode=True) search_alg = HyperOptSearch(random_state_seed=random_seed) search_alg = ConcurrencyLimiter(search_alg, max_concurrent=1) scheduler = AsyncHyperBandScheduler(time_attr="training_iteration", grace_period=10) analysis = tune.run( tune.with_parameters( train_model, data_train=data_train, data_test=data_test, create_model=create_model, model_kwargs_str=model_kwargs_str, callbacks=callbacks, epochs=epochs, n_steps=n_steps, ), verbose=1, config=search_space, search_alg=search_alg, scheduler=scheduler, resources_per_trial={ "cpu": os.cpu_count(), "gpu": 0 }, metric="val_loss", mode="min", name="ray_tune_keras_hyperopt_gru", local_dir=tmp_dir.name, num_samples=num_samples_optim, ) shutil.rmtree(tmp_dir) best_params = analysis.get_best_config(metric="val_loss", mode="min") with open(os.path.join(model_path, hyperparams_file_name), "w") as f: json.dump(best_params, f)
def testBootStrapAnalysis(self): analysis = self.run_full() search_alg3, cost = self.set_basic_conf(analysis) if not isinstance(search_alg3, ConcurrencyLimiter): search_alg3 = ConcurrencyLimiter(search_alg3, 1) tune.run(cost, num_samples=10, search_alg=search_alg3, verbose=0, reuse_actors=True)
def backtest_tune(ticks: np.ndarray, backtest_config: dict, current_best: Union[dict, list] = None): config = create_config(backtest_config) n_days = round_((ticks[-1][2] - ticks[0][2]) / (1000 * 60 * 60 * 24), 0.1) session_dirpath = make_get_filepath(os.path.join('reports', backtest_config['exchange'], backtest_config['symbol'], f"{n_days}_days_{ts_to_date(time())[:19].replace(':', '')}", '')) iters = 10 if 'iters' in backtest_config: iters = backtest_config['iters'] else: print('Parameter iters should be defined in the configuration. Defaulting to 10.') num_cpus = 2 if 'num_cpus' in backtest_config: num_cpus = backtest_config['num_cpus'] else: print('Parameter num_cpus should be defined in the configuration. Defaulting to 2.') n_particles = 10 if 'n_particles' in backtest_config: n_particles = backtest_config['n_particles'] phi1 = 1.4962 phi2 = 1.4962 omega = 0.7298 if 'options' in backtest_config: phi1 = backtest_config['options']['c1'] phi2 = backtest_config['options']['c2'] omega = backtest_config['options']['w'] current_best_params = [] if current_best: if type(current_best) == list: for c in current_best: c = clean_start_config(c, config, backtest_config['ranges']) current_best_params.append(c) else: current_best = clean_start_config(current_best, config, backtest_config['ranges']) current_best_params.append(current_best) ray.init(num_cpus=num_cpus, logging_level=logging.FATAL, log_to_driver=False) pso = ng.optimizers.ConfiguredPSO(transform='identity', popsize=n_particles, omega=omega, phip=phi1, phig=phi2) algo = NevergradSearch(optimizer=pso, points_to_evaluate=current_best_params) algo = ConcurrencyLimiter(algo, max_concurrent=num_cpus) scheduler = AsyncHyperBandScheduler() analysis = tune.run(tune.with_parameters(wrap_backtest, ticks=ticks), metric='objective', mode='max', name='search', search_alg=algo, scheduler=scheduler, num_samples=iters, config=config, verbose=1, reuse_actors=True, local_dir=session_dirpath, progress_reporter=LogReporter(metric_columns=['daily_gain', 'closest_liquidation', 'max_hours_between_fills', 'objective'], parameter_columns=[k for k in backtest_config['ranges'] if type( config[k]) == ray.tune.sample.Float or type( config[k]) == ray.tune.sample.Integer])) ray.shutdown() return analysis
def run_full(self): np.random.seed(162) search_alg3, cost = self.set_basic_conf() if not isinstance(search_alg3, ConcurrencyLimiter): search_alg3 = ConcurrencyLimiter(search_alg3, 1) return tune.run(cost, num_samples=10, search_alg=search_alg3, scheduler=self.get_scheduler(), verbose=0, reuse_actors=True)
def run_hyperopt_tune(config_dict=config_space, smoke_test=False): algo = HyperOptSearch(space=config_dict, metric="mean_loss", mode="min") algo = ConcurrencyLimiter(algo, max_concurrent=4) scheduler = AsyncHyperBandScheduler() analysis = tune.run( easy_objective, metric="mean_loss", mode="min", search_alg=algo, scheduler=scheduler, num_samples=10 if smoke_test else 100, ) print("Best hyperparameters found were: ", analysis.best_config)
def testConvergenceBayesOpt(self): from ray.tune.suggest.bayesopt import BayesOptSearch np.random.seed(0) # Following bayesian optimization searcher = BayesOptSearch(random_search_steps=10) searcher.repeat_float_precision = 5 searcher = ConcurrencyLimiter(searcher, 1) analysis = self._testConvergence(searcher, patience=100) assert len(analysis.trials) < 50 assert math.isclose(analysis.best_config["x"], 0, abs_tol=1e-5)
def set_basic_conf(self): optimizer = skopt.Optimizer([(0, 20), (-100, 100)]) previously_run_params = [[10, 0], [15, -20]] known_rewards = [-189, -1144] def cost(space, reporter): reporter(loss=(space["height"]**2 + space["width"]**2)) search_alg = SkOptSearch(optimizer, ["width", "height"], metric="loss", mode="min", points_to_evaluate=previously_run_params, evaluated_rewards=known_rewards) search_alg = ConcurrencyLimiter(search_alg, max_concurrent=1000) return search_alg, cost
def set_basic_conf(self): instrumentation = 2 parameter_names = ["height", "width"] optimizer = optimizerlib.OnePlusOne(instrumentation) def cost(space, reporter): reporter(loss=(space["height"] - 14)**2 - abs(space["width"] - 3)) search_alg = NevergradSearch( optimizer, parameter_names, metric="loss", mode="min", ) search_alg = ConcurrencyLimiter(search_alg, max_concurrent=1000) return search_alg, cost
def run_optuna_tune(smoke_test=False): algo = OptunaSearch(metric=["loss", "gain"], mode=["min", "max"]) algo = ConcurrencyLimiter(algo, max_concurrent=4) analysis = tune.run( easy_objective, search_alg=algo, num_samples=10 if smoke_test else 100, config={ "steps": 100, "width": tune.uniform(0, 20), "height": tune.uniform(-100, 100), # This is an ignored parameter. "activation": tune.choice(["relu", "tanh"]) }) print("Best hyperparameters for loss found were: ", analysis.get_best_config("loss", "min")) print("Best hyperparameters for gain found were: ", analysis.get_best_config("gain", "max"))
def run_optuna_tune(smoke_test=False): algo = OptunaSearch() algo = ConcurrencyLimiter(algo, max_concurrent=4) scheduler = AsyncHyperBandScheduler() analysis = tune.run( easy_objective, metric="mean_loss", mode="min", search_alg=algo, scheduler=scheduler, num_samples=10 if smoke_test else 100, config={ "steps": 100, "width": tune.uniform(0, 20), "height": tune.uniform(-100, 100), # This is an ignored parameter. "activation": tune.choice(["relu", "tanh"]) }) print("Best hyperparameters found were: ", analysis.best_config)
def set_basic_conf(self): space = { "x": hp.uniform("x", 0, 10), "y": hp.uniform("y", -10, 10), "z": hp.uniform("z", -10, 0) } def cost(space, reporter): loss = space["x"]**2 + space["y"]**2 + space["z"]**2 reporter(loss=loss) search_alg = HyperOptSearch( space, metric="loss", mode="min", random_state_seed=5, n_initial_points=1, ) search_alg = ConcurrencyLimiter(search_alg, max_concurrent=1000) return search_alg, cost
def execute( self, config, dataset=None, training_set=None, validation_set=None, test_set=None, training_set_metadata=None, data_format=None, experiment_name="hyperopt", model_name="run", # model_load_path=None, # model_resume_path=None, skip_save_training_description=False, skip_save_training_statistics=False, skip_save_model=False, skip_save_progress=False, skip_save_log=False, skip_save_processed_input=True, skip_save_unprocessed_output=False, skip_save_predictions=False, skip_save_eval_stats=False, output_directory="results", gpus=None, gpu_memory_limit=None, allow_parallel_threads=True, callbacks=None, backend=None, random_seed=default_random_seed, debug=False, **kwargs, ) -> RayTuneResults: if isinstance(dataset, str) and not has_remote_protocol(dataset) and not os.path.isabs(dataset): dataset = os.path.abspath(dataset) if isinstance(backend, str): backend = initialize_backend(backend) if gpus is not None: raise ValueError( "Parameter `gpus` is not supported when using Ray Tune. " "Configure GPU resources with Ray and set `gpu_resources_per_trial` in your " "hyperopt config." ) if gpu_memory_limit is None and 0 < self._gpu_resources_per_trial_non_none < 1: # Enforce fractional GPU utilization gpu_memory_limit = self.gpu_resources_per_trial hyperopt_dict = dict( config=config, dataset=dataset, training_set=training_set, validation_set=validation_set, test_set=test_set, training_set_metadata=training_set_metadata, data_format=data_format, experiment_name=experiment_name, model_name=model_name, # model_load_path=model_load_path, # model_resume_path=model_resume_path, eval_split=self.split, skip_save_training_description=skip_save_training_description, skip_save_training_statistics=skip_save_training_statistics, skip_save_model=skip_save_model, skip_save_progress=skip_save_progress, skip_save_log=skip_save_log, skip_save_processed_input=skip_save_processed_input, skip_save_unprocessed_output=skip_save_unprocessed_output, skip_save_predictions=skip_save_predictions, skip_save_eval_stats=skip_save_eval_stats, output_directory=output_directory, gpus=gpus, gpu_memory_limit=gpu_memory_limit, allow_parallel_threads=allow_parallel_threads, callbacks=callbacks, backend=backend, random_seed=random_seed, debug=debug, ) mode = "min" if self.goal != MAXIMIZE else "max" metric = "metric_score" if self.search_alg_dict is not None: if TYPE not in self.search_alg_dict: logger.warning("WARNING: Kindly set type param for search_alg " "to utilize Tune's Search Algorithms.") search_alg = None else: search_alg_type = self.search_alg_dict[TYPE] search_alg = tune.create_searcher(search_alg_type, metric=metric, mode=mode, **self.search_alg_dict) else: search_alg = None if self.max_concurrent_trials: assert ( self.max_concurrent_trials > 0 ), f"`max_concurrent_trials` must be greater than 0, got {self.max_concurrent_trials}" if isinstance(search_alg, BasicVariantGenerator) or search_alg is None: search_alg = BasicVariantGenerator(max_concurrent=self.max_concurrent_trials) elif isinstance(search_alg, ConcurrencyLimiter): raise ValueError( "You have specified `max_concurrent_trials`, but the search " "algorithm is already a `ConcurrencyLimiter`. FIX THIS " "by setting `max_concurrent_trials=None`." ) else: search_alg = ConcurrencyLimiter(search_alg, max_concurrent=self.max_concurrent_trials) resources_per_trial = { "cpu": self._cpu_resources_per_trial_non_none, "gpu": self._gpu_resources_per_trial_non_none, } def run_experiment_trial(config, local_hyperopt_dict, checkpoint_dir=None): return self._run_experiment( config, checkpoint_dir, local_hyperopt_dict, self.decode_ctx, _is_ray_backend(backend) ) tune_config = {} tune_callbacks = [] for callback in callbacks or []: run_experiment_trial, tune_config = callback.prepare_ray_tune( run_experiment_trial, tune_config, tune_callbacks, ) if _is_ray_backend(backend): # we can't set Trial actor's CPUs to 0 so we just go very low resources_per_trial = PlacementGroupFactory( [{"CPU": 0.001}] + ([{"CPU": 1, "GPU": 1}] * self._gpu_resources_per_trial_non_none) if self._gpu_resources_per_trial_non_none else [{"CPU": 0.001}] + [{"CPU": 1}] * self._cpu_resources_per_trial_non_none ) if has_remote_protocol(output_directory): run_experiment_trial = tune.durable(run_experiment_trial) self.sync_config = tune.SyncConfig(sync_to_driver=False, upload_dir=output_directory) output_directory = None elif self.kubernetes_namespace: from ray.tune.integration.kubernetes import NamespacedKubernetesSyncer self.sync_config = tune.SyncConfig(sync_to_driver=NamespacedKubernetesSyncer(self.kubernetes_namespace)) run_experiment_trial_params = tune.with_parameters(run_experiment_trial, local_hyperopt_dict=hyperopt_dict) register_trainable(f"trainable_func_f{hash_dict(config).decode('ascii')}", run_experiment_trial_params) analysis = tune.run( f"trainable_func_f{hash_dict(config).decode('ascii')}", config={ **self.search_space, **tune_config, }, scheduler=self.scheduler, search_alg=search_alg, num_samples=self.num_samples, keep_checkpoints_num=1, max_failures=1, # retry a trial failure once resources_per_trial=resources_per_trial, time_budget_s=self.time_budget_s, sync_config=self.sync_config, local_dir=output_directory, metric=metric, mode=mode, trial_name_creator=lambda trial: f"trial_{trial.trial_id}", trial_dirname_creator=lambda trial: f"trial_{trial.trial_id}", callbacks=tune_callbacks, ) if "metric_score" in analysis.results_df.columns: ordered_trials = analysis.results_df.sort_values("metric_score", ascending=self.goal != MAXIMIZE) # Catch nans in edge case where the trial doesn't complete temp_ordered_trials = [] for kwargs in ordered_trials.to_dict(orient="records"): for key in ["parameters", "training_stats", "eval_stats"]: if isinstance(kwargs[key], float): kwargs[key] = {} temp_ordered_trials.append(kwargs) # Trials w/empty eval_stats fields & non-empty training_stats fields ran intermediate # tune.report call(s) but were terminated before reporting eval_stats from post-train # evaluation (e.g., trial stopped due to time budget or relatively poor performance.) # For any such trials, run model evaluation for the best model in that trial & record # results in ordered_trials which is returned & is persisted in hyperopt_statistics.json. for trial in temp_ordered_trials: if trial["eval_stats"] == "{}" and trial["training_stats"] != "{}": # Evaluate the best model on the eval_split, which is validation_set if validation_set is not None and validation_set.size > 0: trial_path = trial["trial_dir"] best_model_path = self._get_best_model_path(trial_path, analysis) if best_model_path is not None: self._evaluate_best_model( trial, trial_path, best_model_path, validation_set, data_format, skip_save_unprocessed_output, skip_save_predictions, skip_save_eval_stats, gpus, gpu_memory_limit, allow_parallel_threads, backend, debug, ) else: logger.warning("Skipping evaluation as no model checkpoints were available") else: logger.warning("Skipping evaluation as no validation set was provided") ordered_trials = [TrialResults.from_dict(load_json_values(kwargs)) for kwargs in temp_ordered_trials] else: logger.warning("No trials reported results; check if time budget lower than epoch latency") ordered_trials = [] return RayTuneResults(ordered_trials=ordered_trials, experiment_analysis=analysis)