def get_raytune_schedule(raytune_cfg): if raytune_cfg["sched"] == "asha": return AsyncHyperBandScheduler( metric=raytune_cfg["default_metric"], mode=raytune_cfg["default_mode"], time_attr="training_iteration", max_t=raytune_cfg["asha"]["max_t"], grace_period=raytune_cfg["asha"]["grace_period"], reduction_factor=raytune_cfg["asha"]["reduction_factor"], brackets=raytune_cfg["asha"]["brackets"], ) elif raytune_cfg["sched"] == "hyperband": return HyperBandScheduler( metric=raytune_cfg["default_metric"], mode=raytune_cfg["default_mode"], time_attr="training_iteration", max_t=raytune_cfg["hyperband"]["max_t"], reduction_factor=raytune_cfg["hyperband"]["reduction_factor"], ) # requires pip install hpbandster ConfigSpace elif (raytune_cfg["sched"] == "bohb") or (raytune_cfg["sched"] == "BOHB"): return HyperBandForBOHB( metric=raytune_cfg["default_metric"], mode=raytune_cfg["default_mode"], time_attr="training_iteration", max_t=raytune_cfg["hyperband"]["max_t"], reduction_factor=raytune_cfg["hyperband"]["reduction_factor"], ) elif (raytune_cfg["sched"] == "pbt") or (raytune_cfg["sched"] == "PBT"): return PopulationBasedTraining( metric=raytune_cfg["default_metric"], mode=raytune_cfg["default_mode"], time_attr="training_iteration", perturbation_interval=raytune_cfg["pbt"]["perturbation_interval"], hyperparam_mutations=raytune_cfg["pbt"]["hyperparam_mutations"], log_config=True, ) # requires pip install GPy sklearn elif (raytune_cfg["sched"] == "pb2") or (raytune_cfg["sched"] == "PB2"): return PB2( metric=raytune_cfg["default_metric"], mode=raytune_cfg["default_mode"], time_attr="training_iteration", perturbation_interval=raytune_cfg["pb2"]["perturbation_interval"], hyperparam_bounds=raytune_cfg["pb2"]["hyperparam_bounds"], log_config=True, ) else: print("INFO: Not using any Ray Tune trial scheduler.") return None
if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() if args.smoke_test: ray.init(num_cpus=2) # force pausing to happen for test else: ray.init() pbt = PB2( time_attr="training_iteration", metric="mean_accuracy", mode="max", perturbation_interval=20, hyperparam_bounds={ # hyperparameter bounds. "lr": [0.0001, 0.02], }) tune.run( pbt_function, name="pbt_test", scheduler=pbt, verbose=False, stop={ "training_iteration": 30, }, num_samples=8, fail_fast=True,
import ray from ray.tune import run, sample_from from ray.tune.schedulers.pb2 import PB2 import random # Create the PB2 scheduler. pb2_scheduler = PB2( time_attr="timesteps_total", metric="episode_reward_mean", mode="max", perturbation_interval=50000, quantile_fraction=0.25, # copy bottom % with top % (weights) # Specifies the hyperparam search space hyperparam_bounds={ "lambda": [0.9, 1.0], "clip_param": [0.1, 0.5], "lr": [1e-3, 1e-5], "train_batch_size": [1000, 60000] }) # Run PPO algorithm experiment on BipedalWalker with PB2. analysis = run( "PPO", name="ppo_pb2_bipedal", scheduler=pb2_scheduler, verbose=1, num_samples=4, # population size stop={"timesteps_total": 1000000}, config={ "env": "BipedalWalker-v2", "log_level": "INFO",
def _pb2_importer(*args, **kwargs): # PB2 introduces a GPy dependency which can be expensive, so we import # lazily. from ray.tune.schedulers.pb2 import PB2 return PB2(*args, **kwargs)
def load_checkpoint(self, checkpoint_dir): path = os.path.join(checkpoint_dir, "checkpoint") checkpoint = torch.load(path) self.obj.net.load_state_dict(checkpoint["model"]) self.obj.optimizer.load_state_dict(checkpoint["optim"]) algo = HyperOptSearch(metric="loss", mode="min") algo = ConcurrencyLimiter(algo, max_concurrent=4) scheduler = PB2( time_attr="training_iteration", perturbation_interval=5, hyperparam_bounds={ "lr": [1e-8, .23] #tune.uniform(1e-4, 0.1 ),#,1e-4), #*10 , "weight_decay": [1e-4, 1e-2] #tune.uniform(1, 5)#,1e-4), #*10 et 0 , "drp": [.05, .15] #,1e-4), #*10 et 0 , "momentum": [.23, 1e-2] #,1e-4), #*10 et 0 , "eps": [1e-4, 1e-2] #,1e-4), #*10 et 0 }) class TestLogger(tune.logger.Logger): def _init(self): progress_file = os.path.join("", "pb2.csv") #aller jusqu'a 9 self._continuing = os.path.exists(progress_file) self._file = open(progress_file, "a") self._csv_out = None
help="The address of server to connect to if using " "Ray Client.", ) args, _ = parser.parse_known_args() if args.smoke_test: ray.init(num_cpus=2) # force pausing to happen for test else: if args.server_address: ray.init(f"ray://{args.server_address}") else: ray.init() pbt = PB2( perturbation_interval=20, hyperparam_bounds={ # hyperparameter bounds. "lr": [0.0001, 0.02], }, ) analysis = tune.run( pbt_function, name="pbt_test", scheduler=pbt, metric="mean_accuracy", mode="max", verbose=False, stop={ "training_iteration": 30, }, num_samples=8,
# Specifies the search space for these hyperparams hyperparam_mutations={ "lambda": lambda: random.uniform(0.9, 1.0), "clip_param": lambda: random.uniform(0.1, 0.5), "lr": lambda: random.uniform(1e-3, 1e-5), "train_batch_size": lambda: random.randint(1000, 60000), }, custom_explore_fn=explore) pb2 = PB2( time_attr=args.criteria, metric="episode_reward_mean", mode="max", perturbation_interval=args.t_ready, quantile_fraction=args.perturb, # copy bottom % with top % # Specifies the hyperparam search space hyperparam_bounds={ "lambda": [0.9, 1.0], "clip_param": [0.1, 0.5], "lr": [1e-3, 1e-5], "train_batch_size": [1000, 60000] }) methods = {"pbt": pbt, "pb2": pb2} timelog = str(datetime.date(datetime.now())) + "_" + str( datetime.time(datetime.now())) args.dir = "{}_{}_{}_Size{}_{}_{}".format(args.algo, args.filename, args.method, str(args.num_samples),
sched_asha = ASHAScheduler( time_attr="training_iteration", max_t=100, grace_period=10, #mode='max', #find maximum, do not define here if you define in tune.run reduction_factor=3, brackets=1) sched_pb2 = PB2( time_attr="training_iteration", #metric="mean_accuracy", #defined in ray.tune #mode="max", #defined in ray.tune perturbation_interval=600.0, quantile_fraction=0.25, # copy bottom % with top % # Specifies the hyperparam search space hyperparam_bounds={ # "threads": 2, "lr": [0.001, 0.1], "hidden": [16, 256], "dropout": lambda: random.uniform(0.0, 0.2), "activation": ["relu", "elu"], "layers": [1, 3] }) analysis = tune.run( train_mnist, name="exp", scheduler=sched_pb2, #Checkpoint settings keep_checkpoints_num=3, checkpoint_freq=3,
experiment_name = "HWalk_Low_Mimic_Search_3" experiment_id = "PPO_HumanoidBulletEnv-v0-Low_4964e_00001_1_2021-05-25_10-04-17" checkpoint_num = "349" resume = False pb2 = PB2( time_attr='training_iteration', metric="episode_reward_mean", mode="max", perturbation_interval=10, quantile_fraction=0.25, hyperparam_bounds={ "lambda": [0.9, 1.0], "clip_param": [0.01, 0.5], "lr": [1e-6, 1e-3], # "num_sgd_iter": [3, 30], "train_batch_size": [8192, 40000] # "gamma": [0.8, 0.9997], # "kl_coeff": [0.3, 1], # "vf_loss_coeff": [0.5, 1], # "entropy_coeff": [0, 0.01], }) analysis = tune.run( PPOTrainer, name="HWalk_Low_Mimic_Search_7", resume=False, # restore="/home/aditya/ray_results/{}/{}/checkpoint_{}/checkpoint-{}".format( # experiment_name, experiment_id, checkpoint_num, checkpoint_num