def run_experiment(config, trainable): """ Run a single tune experiment in parallel as a "remote" function. :param config: The experiment configuration :type config: dict :param trainable: tune.Trainable class with your experiment :type trainable: :class:`ray.tune.Trainable` """ # Stop criteria. Default to total number of iterations/epochs stop_criteria = { "training_iteration": config.get("iterations") } stop_criteria.update(config.get("stop", {})) tune.run( trainable, name=config["name"], local_dir=config["path"], stop=stop_criteria, config=config, num_samples=config.get("repetitions", 1), search_alg=config.get("search_alg", None), scheduler=config.get("scheduler", None), trial_executor=config.get("trial_executor", None), checkpoint_at_end=config.get("checkpoint_at_end", False), checkpoint_freq=config.get("checkpoint_freq", 0), resume=config.get("resume", False), reuse_actors=config.get("reuse_actors", False), verbose=config.get("verbose", 0) )
def run_experiment(config, trainable): """ Run a single tune experiment in parallel as a "remote" function. :param config: The experiment configuration :type config: dict :param trainable: tune.Trainable class with your experiment :type trainable: :class:`ray.tune.Trainable` """ # Stop criteria. Default to total number of iterations/epochs stop_criteria = { "training_iteration": config.get("iterations") } stop_criteria.update(config.get("stop", {})) tune.run( trainable, name=config["name"], local_dir=config["path"], stop=stop_criteria, config=config, num_samples=config.get("repetitions", 1), search_alg=config.get("search_alg", None), scheduler=config.get("scheduler", AsyncHyperBandScheduler( reward_attr='mean_accuracy', time_attr="training_iteration", brackets = 2, grace_period=max(1, int(config.get("iterations", 10)/10)), reduction_factor=3, max_t=config.get("iterations", 10) )), trial_name_creator=tune.function(trial_name_string), trial_executor=config.get("trial_executor", None), checkpoint_at_end=config.get("checkpoint_at_end", False), checkpoint_freq=config.get("checkpoint_freq", 0), upload_dir=config.get("upload_dir", None), sync_function=config.get("sync_function", None), resume=config.get("resume", False), reuse_actors=config.get("reuse_actors", False), verbose=config.get("verbose", 0), resources_per_trial={ "cpu": config.get("cpu_percentage", 1.0), "gpu": config.get("gpu_percentage", 1.0), }, # # added parameters to allow monitoring through REST API # with_server=True, # server_port=4321, )
def run_experiment(config, trainable): """ Run a single tune experiment in parallel as a "remote" function. :param config: The experiment configuration :type config: dict :param trainable: tune.Trainable class with your experiment :type trainable: :class:`ray.tune.Trainable` """ # Stop criteria. Default to total number of iterations/epochs stop_criteria = { "training_iteration": config.get("iterations") } stop_criteria.update(config.get("stop", {})) tune.run( trainable, name=config["name"], local_dir=config["path"], stop=stop_criteria, config=config, num_samples=config.get("repetitions", 1), search_alg=config.get("search_alg", None), scheduler=config.get("scheduler", MedianStoppingRule( time_attr="training_iteration", reward_attr='noise_accuracy', min_samples_required=3, grace_period=20, verbose=False, )), trial_name_creator=tune.function(trial_name_string), trial_executor=config.get("trial_executor", None), checkpoint_at_end=config.get("checkpoint_at_end", False), checkpoint_freq=config.get("checkpoint_freq", 0), upload_dir=config.get("upload_dir", None), sync_function=config.get("sync_function", None), resume=config.get("resume", False), reuse_actors=config.get("reuse_actors", False), verbose=config.get("verbose", 0), resources_per_trial={ # With lots of trials, optimal seems to be 0.5, or 2 trials per GPU # If num trials <= num GPUs, 1.0 is better "cpu": 1, "gpu": config.get("gpu_percentage", 0.5), } )
def run_experiment(config, trainable, num_cpus=1, num_gpus=0): """ Run a single tune experiment in parallel as a "remote" function. :param config: The experiment configuration :type config: dict :param trainable: tune.Trainable class with your experiment :type trainable: :class:`ray.tune.Trainable` """ resources_per_trial = {"cpu": num_cpus, "gpu": num_gpus} print("experiment =", config["name"]) print("resources_per_trial =", resources_per_trial) # Stop criteria. Default to total number of iterations/epochs stop_criteria = {"training_iteration": config.get("iterations")} stop_criteria.update(config.get("stop", {})) print("stop_criteria =", stop_criteria) tune.run( trainable, name=config["name"], stop=stop_criteria, config=config, resources_per_trial=resources_per_trial, num_samples=config.get("repetitions", 1), local_dir=config.get("path", None), upload_dir=config.get("upload_dir", None), sync_function=config.get("sync_function", None), checkpoint_freq=config.get("checkpoint_freq", 0), checkpoint_at_end=config.get("checkpoint_at_end", False), export_formats=config.get("", None), search_alg=config.get("search_alg", None), scheduler=config.get("scheduler", None), verbose=config.get("verbose", 2), resume=config.get("resume", False), queue_trials=config.get("queue_trials", False), reuse_actors=config.get("reuse_actors", False), trial_executor=config.get("trial_executor", None), raise_on_failed_trial=config.get("raise_on_failed_trial", True) )
def main( scenario: object, headless: object, time_total_s: object, seed: object, num_samples: object, num_agents: object, num_workers: object, resume_training: object, result_dir: object, checkpoint_num: object, save_model_path: object, ) -> object: pbt = PopulationBasedTraining( time_attr="time_total_s", metric="episode_reward_mean", mode="max", perturbation_interval=300, resample_probability=0.25, # Specifies the mutations of these hyperparams hyperparam_mutations={ "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5], "rollout_fragment_length": lambda: random.randint(128, 16384), "train_batch_size": lambda: random.randint(2000, 160000), }, # Specifies additional mutations after hyperparam_mutations is applied custom_explore_fn=explore, ) # XXX: There is a bug in Ray where we can only export a trained model if # the policy it's attached to is named 'default_policy'. # See: https://github.com/ray-project/ray/issues/5339 rllib_policies = { "default_policy": ( None, rllib_agent["observation_space"], rllib_agent["action_space"], { "model": { "custom_model": TrainingModel.NAME, # "max_seq_len": 50, "custom_model_config": { "num_transformer_units": 1, "attn_dim": 64, "num_heads": 2, "memory_tau": 50, "head_dim": 32, "ff_hidden_dim": 32, }, } }, ) } smarts.core.seed(seed) tune_config = { "env": RLlibHiWayEnv, "log_level": "WARN", "num_workers": num_workers, "env_config": { "seed": tune.sample_from(lambda spec: random.randint(0, 300)), "scenarios": [str(Path(scenario).expanduser().resolve().absolute())], "headless": headless, "agent_specs": { f"AGENT-{i}": rllib_agent["agent_spec"] for i in range(num_agents) }, }, "multiagent": { "policies": rllib_policies }, "callbacks": { "on_episode_start": on_episode_start, "on_episode_step": on_episode_step, "on_episode_end": on_episode_end, }, } experiment_name = "rllib_example_multi" result_dir = Path(result_dir).expanduser().resolve().absolute() if checkpoint_num: checkpoint = str(result_dir / f"checkpoint_{checkpoint_num}" / f"checkpoint-{checkpoint_num}") else: checkpoint = None print(f"Checkpointing at {str(result_dir)}") analysis = tune.run( "PPO", name=experiment_name, stop={"time_total_s": time_total_s}, checkpoint_freq=1, checkpoint_at_end=True, local_dir=str(result_dir), resume=resume_training, restore=checkpoint, max_failures=30000, num_samples=num_samples, export_formats=["model", "checkpoint"], config=tune_config, scheduler=pbt, ) print(analysis.dataframe().head()) best_logdir = Path(analysis.get_best_logdir("episode_reward_max")) model_path = best_logdir / "model" copy_tree(str(model_path), save_model_path, overwrite=True) print(f"Wrote model to: {save_model_path}")
def train(config, experiments, num_cpus, num_gpus, redis_address, show_list): print("config =", config.name) print("num_gpus =", num_gpus) print("num_cpus =", num_cpus) print("redis_address =", redis_address) # Use configuration file location as the project location. project_dir = os.path.dirname(config.name) project_dir = os.path.abspath(project_dir) print("project_dir =", project_dir) # Load and parse experiment configurations configs = parse_config(config, experiments, globals=globals()) if show_list: print("Experiments:", list(configs.keys())) return # Initialize ray cluster if redis_address is not None: ray.init(redis_address=redis_address, include_webui=True) num_cpus = 1 else: ray.init(num_cpus=num_cpus, num_gpus=num_gpus, local_mode=num_cpus == 1) # Run experiments resources_per_trial = {"cpu": 1, "gpu": num_gpus / num_cpus} print("resources_per_trial =", resources_per_trial) for exp in configs: print("experiment =", exp) config = configs[exp] config["name"] = exp # Stop criteria. Default to total number of iterations/epochs stop_criteria = {"training_iteration": config.get("iterations")} stop_criteria.update(config.get("stop", {})) print("stop_criteria =", stop_criteria) # Make sure local directories are relative to the project location path = config.get("path", None) if path and not os.path.isabs(path): config["path"] = os.path.join(project_dir, path) data_dir = config.get("data_dir", "data") if not os.path.isabs(data_dir): config["data_dir"] = os.path.join(project_dir, data_dir) tune.run( SpeechExperimentTune, name=config["name"], stop=stop_criteria, config=config, resources_per_trial=resources_per_trial, num_samples=config.get("repetitions", 1), local_dir=config.get("path", None), upload_dir=config.get("upload_dir", None), sync_function=config.get("sync_function", None), checkpoint_freq=config.get("checkpoint_freq", 0), checkpoint_at_end=config.get("checkpoint_at_end", False), export_formats=config.get("", None), search_alg=config.get("search_alg", None), scheduler=config.get("scheduler", None), verbose=config.get("verbose", 2), resume=config.get("resume", False), queue_trials=config.get("queue_trials", False), reuse_actors=config.get("reuse_actors", False), trial_executor=config.get("trial_executor", None), raise_on_failed_trial=config.get("raise_on_failed_trial", True) ) ray.shutdown()
from ray import tune from ray.rllib.agents.ppo import PPOTrainer tune.run(PPOTrainer, config={"env": "CartPole-v0"})
def main(): ray.init() logging.getLogger().setLevel(logging.INFO) date = datetime.now().strftime('%Y%m%d_%H%M%S') parser = argparse.ArgumentParser() # parser.add_argument('--scenario', type=str, default='PongNoFrameskip-v4') parser.add_argument('--config', type=str, default='config/global_config.json', help='config file') parser.add_argument('--algo', type=str, default='DQN', choices=['DQN', 'DDQN', 'DuelDQN'], help='choose an algorithm') parser.add_argument('--inference', action="store_true", help='inference or training') parser.add_argument('--ckpt', type=str, help='inference or training') parser.add_argument('--epoch', type=int, default=10, help='number of training epochs') parser.add_argument( '--num_step', type=int, default=10**3, help='number of timesteps for one episode, and for inference') parser.add_argument('--save_freq', type=int, default=100, help='model saving frequency') parser.add_argument('--batch_size', type=int, default=1, help='model saving frequency') parser.add_argument('--state_time_span', type=int, default=5, help='state interval to receive long term state') parser.add_argument('--time_span', type=int, default=30, help='time interval to collect data') args = parser.parse_args() model_dir = "model/{}_{}".format(args.algo, date) result_dir = "result/{}_{}".format(args.algo, date) config_env = env_config(args) # ray.tune.register_env('gym_cityflow', lambda env_config:CityflowGymEnv(config_env)) new_config_env = env_config(args, True) config_agent = agent_config(config_env) new_config_agent = agent_config(new_config_env) # # build cityflow environment tune.run('DQN', stop={"training_iteration": 100}, config=config_agent, local_dir='~/ray_results/training/', checkpoint_freq=1) print( '-------------------------------training over----------------------------' ) tune.run('DQN', stop={"training_iteration": 1000}, config=new_config_agent, restore='~/ray_results/training/DQN', checkpoint_freq=1)
# This dict could mix continuous dimensions and discrete dimensions, # for example: dim_dict = { # for continuous dimensions: (continuous, search_range, precision) "height": (ValueType.CONTINUOUS, [-10, 10], 1e-2), # for discrete dimensions: (discrete, search_range, has_order) "width": (ValueType.DISCRETE, [-10, 10], False) } config = { "num_samples": 10 if args.smoke_test else 1000, "config": { "steps": 10, # evaluation times } } zoopt_search = ZOOptSearch( algo="Asracos", # only support ASRacos currently budget=config["num_samples"], dim_dict=dim_dict, metric="mean_loss", mode="min") scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min") tune.run(easy_objective, search_alg=zoopt_search, name="zoopt_search", scheduler=scheduler, **config)
# >> lstm_cell_size = config["model"]["lstm_cell_size"] # >> env = StatelessCartPole() # >> obs = env.reset() # >> # >> # range(2) b/c h- and c-states of the LSTM. # >> init_state = state = [ # .. np.zeros([lstm_cell_size], np.float32) for _ in range(2) # .. ] # >> prev_a = 0 # >> prev_r = 0.0 # >> # >> while True: # >> a, state_out, _ = trainer.compute_action( # .. obs, state, prev_a, prev_r) # >> obs, reward, done, _ = env.step(a) # >> if done: # >> obs = env.reset() # >> state = init_state # >> prev_a = 0 # >> prev_r = 0.0 # >> else: # >> state = state_out # >> prev_a = a # >> prev_r = reward results = tune.run(args.run, config=config, stop=stop, verbose=2) if args.as_test: check_learning_achieved(results, args.stop_reward) ray.shutdown()
"max_depth": tune.randint(1, 9) } ray.init(address="auto") ray_params = RayParams( elastic_training=False, max_actor_restarts=2, num_actors=4, cpus_per_actor=1, gpus_per_actor=0) start = time.time() analysis = tune.run( tune.with_parameters(train_wrapper, ray_params=ray_params), config=search_space, num_samples=32, resources_per_trial=ray_params.get_tune_resources()) taken = time.time() - start result = { "time_taken": taken, "trial_states": dict( Counter([trial.status for trial in analysis.trials])) } test_output_json = os.environ.get("TEST_OUTPUT_JSON", "/tmp/tune_32x4.json") with open(test_output_json, "wt") as f: json.dump(result, f) print("PASSED.")
TNBESTrainer = PPOESTrainer.with_updates( name="TNBES", validate_config=validate_config, make_policy_optimizer=make_policy_optimizer_tnbes, default_config=tnbes_config, default_policy=TNBESPolicy) if __name__ == '__main__': # Test codes initialize_ray(test_mode=True, local_mode=False) env_name = "CartPole-v0" num_agents = 3 config = { "num_sgd_iter": 2, "train_batch_size": 400, "env": MultiAgentEnvWrapper, "env_config": { "env_name": env_name, "num_agents": num_agents }, "update_steps": 1000, "use_tnb_plus": tune.grid_search([True, False]), "novelty_type": tune.grid_search(["mse", 'kl']), "use_novelty_value_network": True } tune.run(TNBESTrainer, name="DELETEME_TEST", verbose=2, stop={"timesteps_total": 10000}, config=config)
"convert_to_discrete_action_space": True, "wrap_for_bandits": True, }, } # Actual training_iterations will be 10 * timesteps_per_iteration # (100 by default) = 2,000 training_iterations = 10 print("Running training for %s time steps" % training_iterations) start_time = time.time() analysis = tune.run( "BanditLinUCB", config=config, stop={"training_iteration": training_iterations}, num_samples=1, checkpoint_at_end=False, ) print("The trials took", time.time() - start_time, "seconds\n") # Analyze cumulative regrets of the trials frame = pd.DataFrame() for key, df in analysis.trial_dataframes.items(): frame = frame.append(df, ignore_index=True) x = frame.groupby( "agent_timesteps_total")["episode_reward_mean"].aggregate( ["mean", "max", "min", "std"]) plt.plot(x["mean"])
if __name__ == '__main__': ray.init(local_mode=False, log_to_driver=False) cpu = 7 trials = list(_iter()) n_trials = len(trials) - 1 print(f"Total n of trials: {n_trials}") start_from = 0 # 7 stoppign_car name_group = NameGroup() for i, (problem, method, other_config) in enumerate(trials): if i < start_from: continue print(f"Starting trial: {i}/{n_trials}") analysis = tune.run( run_parameterised_experiment, name="experiment_collection_cartpole_iterations", config={ "main_params": (problem, method, other_config), "n_workers": cpu }, resources_per_trial={"cpu": 1}, stop={"time_since_restore": 500}, trial_name_creator=name_group.trial_str_creator, # resume="PROMPT", verbose=0, log_to_file=True) # df = analysis.results_df # df.to_json(os.path.join(analysis.best_logdir, "experiment_results.json")) print(f"Finished trial: {i}/{n_trials}")
def main(args): # ==================================== # init env config # ==================================== if args.no_debug: ray.init(webui_host="127.0.0.1") else: ray.init(local_mode=True, webui_host="127.0.0.1") # use ray cluster for training # ray.init( # address="auto" if args.address is None else args.address, # redis_password="******", # ) # # print( # "--------------- Ray startup ------------\n{}".format( # ray.state.cluster_resources() # ) # ) agent_specs = {"AGENT-007": agent_spec} env_config = { "seed": 42, "scenarios": [scenario_paths], "headless": args.headless, "agent_specs": agent_specs, } # ==================================== # init tune config # ==================================== class MultiEnv(RLlibHiWayEnv): def __init__(self, env_config): env_config["scenarios"] = [ scenario_paths[(env_config.worker_index - 1) % len(scenario_paths)] ] super(MultiEnv, self).__init__(config=env_config) ModelCatalog.register_custom_model("my_fc", FullyConnectedNetwork) tune_config = { "env": MultiEnv, "env_config": env_config, "multiagent": { "policies": { "default_policy": ( None, OBSERVATION_SPACE, ACTION_SPACE, {}, ) }, "policy_mapping_fn": lambda agent_id: "default_policy", }, "model": { "custom_model": "my_fc", }, "framework": "torch", "callbacks": { "on_episode_start": on_episode_start, "on_episode_step": on_episode_step, "on_episode_end": on_episode_end, }, "lr": 1e-4, "log_level": "WARN", "num_workers": args.num_workers, "horizon": args.horizon, "train_batch_size": 10240 * 3, # "observation_filter": "MeanStdFilter", # "batch_mode": "complete_episodes", # "grad_clip": 0.5, # "model":{ # "use_lstm": True, # }, } tune_config.update({ "lambda": 0.95, "clip_param": 0.2, "num_sgd_iter": 10, "sgd_minibatch_size": 1024, "gamma": 0.995, # "l2_coeff": 5e-4, }) # ==================================== # init log and checkpoint dir_info # ==================================== experiment_name = EXPERIMENT_NAME.format( scenario="multi_scenarios", algorithm="PPO", n_agent=1, ) log_dir = Path(args.log_dir).expanduser().absolute() / RUN_NAME log_dir.mkdir(parents=True, exist_ok=True) print(f"Checkpointing at {log_dir}") if args.restore: restore_path = Path(args.restore).expanduser() print(f"Loading model from {restore_path}") else: restore_path = None # run experiments analysis = tune.run( PPOTrainer, # "PPO", name=experiment_name, stop={"time_total_s": 24 * 60 * 60}, checkpoint_freq=2, checkpoint_at_end=True, local_dir=str(log_dir), resume=args.resume, restore=restore_path, max_failures=1000, export_formats=["model", "checkpoint"], config=tune_config, ) print(analysis.dataframe().head())
}) # __pbt_end__ # __tune_begin__ class Stopper: def __init__(self): self.should_stop = False def stop(self, trial_id, result): max_iter = 5 if args.smoke_test else 100 if not self.should_stop and result["mean_accuracy"] > 0.96: self.should_stop = True return self.should_stop or result["training_iteration"] >= max_iter stopper = Stopper() analysis = tune.run(PytorchTrainble, name="pbt_test", scheduler=scheduler, reuse_actors=True, verbose=1, stop=stopper.stop, export_formats=[ExportFormat.MODEL], num_samples=4, config={ "lr": tune.uniform(0.001, 1), "momentum": tune.uniform(0.001, 1), }) # __tune_end__
# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0. "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")), "framework": args.framework, } stop = { "timesteps_total": args.stop_timesteps, "training_iteration": args.stop_iters, } # Train the "main" policy to play really well using self-play. results = None if not args.from_checkpoint: results = tune.run("PPO", config=config, stop=stop, checkpoint_at_end=True, checkpoint_freq=10, verbose=1) # Restore trained trainer (set to non-explore behavior) and play against # human on command line. if args.num_episodes_human_play > 0: num_episodes = 0 trainer = PPOTrainer(config=dict(config, **{"explore": False})) if args.from_checkpoint: trainer.restore(args.from_checkpoint) else: checkpoint = results.get_last_checkpoint() if not checkpoint: raise ValueError("No last checkpoint found in results!") trainer.restore(checkpoint)
print("Restoring from checkpoint path", checkpoint_path) trainer.restore(checkpoint_path) # Serving and training loop. ts = 0 for _ in range(args.stop_iters): results = trainer.train() print(pretty_print(results)) checkpoint = trainer.save() print("Last checkpoint", checkpoint) with open(checkpoint_path, "w") as f: f.write(checkpoint) if (results["episode_reward_mean"] >= args.stop_reward or ts >= args.stop_timesteps): break ts += results["timesteps_total"] # Run with Tune for auto env and trainer creation and TensorBoard. else: stop = { "training_iteration": args.stop_iters, "timesteps_total": args.stop_timesteps, "episode_reward_mean": args.stop_reward, } tune.run(args.run, config=config, stop=stop, verbose=2, restore=checkpoint_path)
def main(args): sumo_scenario = SumoScenario( scenario_root=os.path.abspath(args.scenario), random_social_vehicle_count=args.num_social_vehicles) tune_config = { 'env': CompetitionEnv, 'log_level': 'WARN', 'num_workers': 2, 'horizon': 5000, 'env_config': { 'seed': tune.randint(1000), 'sumo_scenario': sumo_scenario, 'headless': args.headless, 'observation_space': OBSERVATION_SPACE, 'action_space': ACTION_SPACE, 'reward_function': tune.function(reward), 'observation_function': tune.function(observation), 'action_function': tune.function(action), }, 'model': { 'custom_model': MODEL_NAME, }, "callbacks": { "on_episode_start": on_episode_start, "on_episode_step": on_episode_step, "on_episode_end": on_episode_end } } experiment_name = 'rllib_example' log_dir = os.path.expanduser("~/ray_results") print(f"Checkpointing at {log_dir}") analysis = tune.run( 'PPO', name=experiment_name, stop={'time_total_s': 60 * 60}, # 1 hour checkpoint_freq=1, checkpoint_at_end=True, local_dir=log_dir, resume=args.resume_training, max_failures=10, num_samples=args.num_samples, export_formats=['model', 'checkpoint'], config=tune_config, ) print(analysis.dataframe().head()) logdir = analysis.get_best_logdir('episode_reward_max') model_path = os.path.join(logdir, 'model') dest_model_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "model") if not os.path.exists(dest_model_path): shutil.copytree(model_path, dest_model_path) print(f"wrote model to: {dest_model_path}") else: print(f"Model already exists at {dest_model_path} not overwriting") print(f"New model is stored at {model_path}")
log_probs = action_dist.logp(train_batch[SampleBatch.ACTIONS]) return -train_batch[SampleBatch.REWARDS].dot(log_probs) # <class 'ray.rllib.policy.torch_policy_template.MyTorchPolicy'> MyTorchPolicy = build_policy_class(name="MyTorchPolicy", framework="torch", loss_fn=policy_gradient_loss) # Create a new Trainer using the Policy defined above. class MyTrainer(Trainer): def get_default_policy_class(self, config): return MyTorchPolicy if __name__ == "__main__": args = parser.parse_args() ray.init(num_cpus=args.num_cpus or None) tune.run( MyTrainer, stop={"training_iteration": args.stop_iters}, config={ "env": "CartPole-v0", # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0. "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")), "num_workers": 2, "framework": "torch", }, )
} current_best_params = [ { "width": 1, "height": 2, "activation": 0 # Activation will be relu }, { "width": 4, "height": 2, "activation": 1 # Activation will be tanh } ] config = { "num_samples": 10 if args.smoke_test else 1000, "config": { "iterations": 100, }, "stop": { "timesteps_total": 100 }, } algo = HyperOptSearch(space, metric="mean_loss", mode="min", points_to_evaluate=current_best_params) scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min") run(easy_objective, search_alg=algo, scheduler=scheduler, **config)
from ray import tune from ray.rllib.agents.ppo import PPOTrainer tune.run(PPOTrainer, config={"env": "CartPole-v0"}) # "log_level": "INFO" for verbose, # "framework": "tfe"/"tf2" for eager, # "framework": "torch" for PyTorch
if __name__ == "__main__": args = parser.parse_args() ModelCatalog.register_custom_model("cc_model", CentralizedCriticModel) tune.run( "PPO", stop={ "timesteps_total": args.stop, "episode_reward_mean": 7.99, }, config={ "env": GlobalObsTwoStepGame, "batch_mode": "complete_episodes", "callbacks": { "on_postprocess_traj": fill_in_actions, }, "num_workers": 0, "multiagent": { "policies": { "pol1": (None, GlobalObsTwoStepGame.observation_space, GlobalObsTwoStepGame.action_space, {}), "pol2": (None, GlobalObsTwoStepGame.observation_space, GlobalObsTwoStepGame.action_space, {}), }, "policy_mapping_fn": lambda x: "pol1" if x == 0 else "pol2", }, "model": { "custom_model": "cc_model", }, })
def train(config: dict, checkpoint_path: str = None): ray.init(local_mode=config["ray"]["local_mode"]) env_config = { "effect": config["env"]["effect"], "metric": config["env"]["metric"], "feature_extractors": config["env"]["feature_extractors"], "source": config["env"]["source"], "targets": config["env"]["targets"], "eval_interval": config["env"]["eval_interval"], "render_to_dac": False, "standardize_rewards": False, # NOTE: experimental feature "debug": config["env"]["debug"], } learning_rate = (config["agent"]["learning_rate"] if "learning_rate" in config["agent"].keys() else 3e-3) hidden_layers = config["agent"]["hidden_layers"] tanh = "tanh" common_config = { "env": CrossAdaptiveEnv, "env_config": env_config, "framework": "torch", "num_cpus_per_worker": config["ray"]["num_cpus_per_worker"], "log_level": config["ray"]["log_level"], "observation_filter": "MeanStdFilter", "num_workers": 0, "train_batch_size": 256, } def sac_trainer(): agent_name = "SAC" sac_config = { **sac.DEFAULT_CONFIG.copy(), **common_config.copy(), "learning_starts": 10000 if not checkpoint_path else 0, "target_entropy": -24, # set empirically after trials with dist_lpf "optimization": { "actor_learning_rate": learning_rate, "critic_learning_rate": learning_rate, "entropy_learning_rate": learning_rate, }, # Model options for the Q network(s). "Q_model": { "fcnet_activation": tanh, "fcnet_hiddens": hidden_layers, }, # Model options for the policy function. "policy_model": { "fcnet_activation": tanh, "fcnet_hiddens": hidden_layers, }, } return sac.SACTrainer, sac_config, agent_name def ppo_trainer(): agent_name = "PPO" ppo_config = { **ppo.DEFAULT_CONFIG.copy(), **common_config.copy(), "lr": learning_rate, "model": { "fcnet_hiddens": hidden_layers, "fcnet_activation": tanh, }, "sgd_minibatch_size": 64, # Coefficient of the entropy regularizer. Unused if a schedule if set "entropy_coeff": 0.0, # Decay schedule for the entropy regularizer. "entropy_coeff_schedule": None, } return ppo.PPOTrainer, ppo_config, agent_name agent = config["agent"]["agent"] available_trainers = ["sac", "ppo"] no_agent_error = ValueError(f"{agent} not available") if agent not in available_trainers: raise no_agent_error elif agent == "sac": trainer, agent_config, agent_name = sac_trainer() elif agent == "ppo": trainer, agent_config, agent_name = ppo_trainer() # ############### # # Hyperparameter search # entropy_coeffs = [0.01 * i for i in range(4)] # agent_config = tune.grid_search( # [ # { # **agent_config.copy(), # "entropy_coeff": entropy_coeff, # } # for entropy_coeff in entropy_coeffs # ] # ) # ############### if checkpoint_path: # NOTE: hacky way to find the corresponding Tune 'name' of the # restored experiment since the checkpoint is always three levels deeper path = Path(checkpoint_path) name = path.parent.parent.parent.name else: name = f'{config["label"]}_{agent_name}_{timestamp(millis=False)}' progress_reporter = CLIReporter(max_report_frequency=30) analysis = tune.run( trainer, config=agent_config, local_dir=RAY_RESULTS_DIR, checkpoint_at_end=config["agent"]["checkpoint_at_end"], checkpoint_freq=config["agent"]["checkpoint_freq"], name=name, restore=checkpoint_path, # None is default progress_reporter=progress_reporter, stop={"training_iteration": 1000}, ) print(analysis)
def main(experiment_name, experiment_description, train_file_name, eval_file_name, eval_label_file_name, learning_rates, weight_decays, experiment_number=0, device='cuda', max_gpus=1, process_per_gpu=1): file_directory = os.path.dirname(os.path.abspath(__file__)) parent_folder = os.path.dirname(file_directory) base_save_path = os.path.join(file_directory, 'temp/') # Create temp folder if not exists create_temp_folder(file_directory) experiment_description = experiment_description train_file_name = train_file_name eval_file_name = eval_file_name eval_label_file_name = eval_label_file_name base_file_path = os.path.join( parent_folder, 'original_data/Datasets/BCICompetitionIV/Data/BCICIV_2a_gdf/') base_label_path = os.path.join(parent_folder, 'savecopywithlabels/') low_pass = 7 high_pass = 30 raw_train_file_name = os.path.join(base_file_path, train_file_name) raw_eval_file_name = os.path.join(base_file_path, eval_file_name) extract_features = False scale_1 = [7, 15, 0.5] scale_2 = [16, 30, 0.5] split_ratio = 0.7 splitting_strategy = 'balanced-copy' batch_size = 32 shuffle = True workers = 1 max_epochs = 300 model_channels = 8 model_classes = 2 ## DATA PREP START # load raw file into temp folder load_eeg_from_gdf(low_pass, high_pass, raw_train_file_name, f'{base_save_path}raw_train', frequency=250, trial_duration=6) load_eeg_from_gdf(low_pass, high_pass, raw_eval_file_name, f'{base_save_path}raw_eval', frequency=250, trial_duration=6) # move and prepare labels into temp folder shutil.copyfile(f'{base_label_path}{eval_label_file_name}', f'{base_save_path}raw_eval_labels.npy') # apply CPS in temp folder apply_csp(f'{base_save_path}raw_train_{low_pass}_{high_pass}.npy', f'{base_save_path}raw_train_labels.npy', f'{base_save_path}raw_eval_{low_pass}_{high_pass}.npy', f'{base_save_path}raw_eval_labels.npy', f'{base_save_path}csp_train', f'{base_save_path}csp_eval', low_pass, high_pass) # for each of the datasets for i in range(1, 5): # apply normalize without extraction in temp folder apply_normlized_feature_extraction( f'{base_save_path}csp_train_class{i}.npy', f'{base_save_path}raw_train_labels.npy', f'{base_save_path}normalized_train', f'{base_save_path}csp_eval_class{i}.npy', f'{base_save_path}raw_eval_labels.npy', f'{base_save_path}normalized_eval', i, extract=extract_features) # Prepare Train and Val sets data_splitter = DataSplitter( f'{base_save_path}normalized_train_class{i}.npy', f'{base_save_path}normalized_train_class{i}_labels.npy', f'{base_save_path}_class{i}', split_ratio) data_splitter.split(splitting_strategy) ## DATA PREP END ## Threaded Model Start max_gpus = int(max_gpus) process_per_gpu = int(process_per_gpu) device = 'cuda' ray_config = { 'base_save_path': base_save_path, 'batch_size': batch_size, 'device': device, 'eval_file_name': eval_file_name, 'experiment_description': experiment_description, 'experiment_name': experiment_name, 'extract_features': extract_features, 'file_directory': file_directory, 'high_pass': high_pass, 'learning_rate': tune.grid_search(learning_rates), 'low_pass': low_pass, 'max_epochs': max_epochs, 'model_channels': model_channels, 'model_classes': model_classes, 'scale_1': scale_1, 'scale_2': scale_2, 'shuffle': shuffle, 'split_ratio': split_ratio, 'splitting_strategy': splitting_strategy, 'train_file_name': train_file_name, 'weight_decay': tune.grid_search(weight_decays), 'workers': workers } analysis = tune.run(run_threaded_model, config=ray_config, resources_per_trial={"gpu": 1 / process_per_gpu}, num_samples=1, mode='max', max_concurrent_trials=max_gpus * process_per_gpu) best_conf = analysis.get_best_config(metric="mean_accuracy", mode="max") print("Best config: ", best_conf) # Delete the temp Folder delete_temp_folder(file_directory) return best_conf
debug_sparse=True, dataset_name="CIFAR10", # input_size=(3,32,32), # 784, stats_mean=(0.4914, 0.4822, 0.4465), stats_std=(0.2023, 0.1994, 0.2010), data_dir="~/nta/datasets", device="cpu", optim_alg="SGD", ) # run ray.init() tune.run( Trainable, name="SET_local_test", num_samples=1, local_dir=os.path.expanduser("~/nta/results"), config=config, checkpoint_freq=0, checkpoint_at_end=False, stop={"training_iteration": 10}, resources_per_trial={ "cpu": 1, "gpu": 0 }, ) """" ongoing notes """
if iteration == 0 or 5 <= iteration <= 19: # Either the optimizer doesn't exist yet, or we're changing the loss # function and the adaptive state is invalidated. I don't think the # original paper contained this logic, never resetting the Adam # optimizer while performing "warmup" on the regularization, but it # seems like the right thing to do. self.optimizer = torch.optim.Adam(self._get_parameters(), lr=lr) else: for param_group in self.optimizer.param_groups: param_group["lr"] = lr return super().run_epoch(iteration) if __name__ == "__main__": ray.init() tune.run( experiments.as_ray_trainable(VDropExperiment), name=os.path.basename(__file__).replace(".py", ""), num_samples=1, checkpoint_freq=0, checkpoint_at_end=False, resources_per_trial={ "cpu": 1, "gpu": (1 if torch.cuda.is_available() else 0) }, loggers=DEFAULT_LOGGERS, verbose=1, )
(cmt_anomaly - tmp_anomaly), axis=0) precision_anomaly[np.isnan(precision_anomaly)] = 0 recall_anomaly[np.isnan(recall_anomaly)] = 0 epoch_acc_anomaly = 2 * np.mean(recall_anomaly) * np.mean( precision_anomaly) / (np.mean(precision_anomaly) + np.mean(recall_anomaly)) epoch_acc_anatomy = 2 * np.mean(recall_anatomy) * np.mean( precision_anatomy ) / ( np.mean(precision_anatomy) + np.mean(recall_anatomy) ) #running_corrects_anomaly*batch_size / len(dataloaders[phase].dataset) # We save the latent variable and reconstruction for later use # we will need them on the CPU to plot #x = x.to("cpu") #x_hat = x_hat.to("cpu") #z = z.detach().to("cpu").numpy() val_acc_anatomy_history.append(epoch_acc_anatomy) val_acc_anomaly_history.append(epoch_acc_anomaly) valid_loss.append(np.mean(batch_loss_val)) valid_kl.append(np.mean(batch_kl_val)) tune.track.log(mean_acc=(epoch_acc_anatomy + epoch_acc_anomaly) / 2) #-------------------------------------------------------------------------------------------------- analysis = tune.run( train_model, config={"alpha": tune.grid_search([1, 5, 10, 20, 50, 100])}, resources_per_trial={'gpu': 2})
def main(num_samples: int, num_workers: int): pbt = PopulationBasedTraining(time_attr="time_total_s", perturbation_interval=50, resample_probability=0.25, hyperparam_mutations={ "lambda": tune.uniform(0.9, 1.0), "clip_param": tune.uniform(0.01, 0.5), "lr": [1e-2, 1e-3, 5e-4, 1e-4, 5e-5, 1e-5], "num_sgd_iter": tune.randint(1, 30), "sgd_minibatch_size": tune.randint(128, 16384), "train_batch_size": tune.randint(2000, 160000), }) analysis = tune.run("PPO", name="pbt_portfolio_reallocation", scheduler=pbt, num_samples=num_samples, metric="episode_reward_min", mode="max", config={ "env": "TradingEnv", "env_config": { "total_steps": 1000, "num_assets": 4, "commission": 1e-3, "time_cost": 0, "window_size": tune.randint(5, 50), "min_periods": 150 }, "kl_coeff": 1.0, "num_workers": num_workers, "num_gpus": 0, "observation_filter": tune.choice(["NoFilter", "MeanStdFilter"]), "framework": "torch", "model": { "custom_model": "reallocate", "custom_model_config": { "num_assets": 4 }, "custom_action_dist": "dirichlet", }, "num_sgd_iter": 10, "sgd_minibatch_size": 128, "lambda": tune.uniform(0.9, 1.0), "clip_param": tune.uniform(0.1, 0.5), "lr": tune.loguniform(1e-2, 1e-5), "train_batch_size": tune.randint(1000, 20000) }, stop={ "episode_reward_min": 20, "training_iteration": 100 }, checkpoint_at_end=True, local_dir="./results") checkpoints = analysis.get_trial_checkpoints_paths( trial=analysis.get_best_trial(metric="episode_reward_min", mode="max"), metric="episode_reward_mean") params = {"config": analysis.best_config, "checkpoints": checkpoints} json.dump(params, open("data/tuned_params.json", "w"), indent=4)
# Optional: Pass the parameter space yourself # config_space = CS.ConfigurationSpace() # config_space.add_hyperparameter( # CS.UniformFloatHyperparameter("width", lower=0, upper=20)) # config_space.add_hyperparameter( # CS.UniformFloatHyperparameter("height", lower=-100, upper=100)) # config_space.add_hyperparameter( # CS.CategoricalHyperparameter( # "activation", choices=["relu", "tanh"])) bohb_hyperband = HyperBandForBOHB(time_attr="training_iteration", max_t=100, reduction_factor=4, stop_last_trials=False) bohb_search = TuneBOHB( # space=config_space, # If you want to set the space manually max_concurrent=4) analysis = tune.run(MyTrainableClass, name="bohb_test", config=config, scheduler=bohb_hyperband, search_alg=bohb_search, num_samples=10, stop={"training_iteration": 100}, metric="episode_reward_mean", mode="max") print("Best hyperparameters found were: ", analysis.best_config)
import argparse import pprint from ray import tune import ray from ray.rllib.agents.a3c.a3c import (DEFAULT_CONFIG, A3CTrainer as trainer) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--env', help='Gym env name.') args = parser.parse_args() config = DEFAULT_CONFIG.copy() config_update = { "env": args.env, "num_gpus": 1, "num_workers": 50, "evaluation_num_workers": 10, "evaluation_interval": 1, "use_gae": False } config.update(config_update) pp = pprint.PrettyPrinter(indent=4) pp.pprint(config) ray.init() tune.run(trainer, stop={"timesteps_total": 2000000}, config=config)
# ----- Additional Validation ----- test_noise=False, noise_level=0.1, # ----- Debugging ----- debug_weights=True, debug_sparse=True, ) # ray configurations experiment_name = "gsc-test" tune_config = dict( name=experiment_name, num_samples=1, local_dir=os.path.expanduser(os.path.join("~/nta/results", experiment_name)), checkpoint_freq=0, checkpoint_at_end=False, stop={"training_iteration": 100}, resources_per_trial={"cpu": 1, "gpu": 1}, loggers=DEFAULT_LOGGERS, verbose=1, config=base_exp_config, ) # override when running local for test if not torch.cuda.is_available(): base_exp_config["device"] = "cpu" tune_config["resources_per_trial"] = {"cpu": 1} init_ray() tune.run(Trainable, **tune_config)
self.should_stop = True return self.should_stop or result["training_iteration"] >= max_iter def stop_all(self): return self.should_stop stopper = CustomStopper() analysis = tune.run( train_convnet, name="pbt_test", scheduler=scheduler, metric="mean_accuracy", mode="max", verbose=1, stop=stopper, export_formats=[ExportFormat.MODEL], checkpoint_score_attr="mean_accuracy", keep_checkpoints_num=4, num_samples=4, config={ "lr": tune.uniform(0.001, 1), "momentum": tune.uniform(0.001, 1), }) # __tune_end__ if args.server_address: # If using Ray Client, we want to make sure checkpoint access # happens on the server. So we wrap `test_best_model` in a Ray task. # We have to make sure it gets executed on the same node that # ``tune.run`` is called on. from ray.tune.utils.util import force_on_current_node
local_dir=os.getenv("HOME") + "/dcase/result/ray_results", stop=TrainStopper(max_ep=200, stop_thres=200), checkpoint_freq=1, keep_checkpoints_num=1, checkpoint_at_end=True, checkpoint_score_attr="acc", resources_per_trial={"gpu": 0, "cpu": 64}, ) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument('-test', action='store_true') # default = false args = parser.parse_args() if args.test: print("====== Test Run =======") from asc import exp_utils c = exp_utils.exp_to_config(exp) t = Trainable(c) t._train() exit() ray.shutdown() ray.init(local_mode=True, webui_host="0.0.0.0") analysis = tune.run( exp, verbose=2, )
# <class 'ray.rllib.agents.trainer_template.MyCustomTrainer'> MyTrainer = build_trainer( name="MyCustomTrainer", default_policy=MyTFPolicy, ) if __name__ == "__main__": ray.init() args = parser.parse_args() ModelCatalog.register_custom_model("eager_model", EagerModel) config = { "env": "CartPole-v0", "num_workers": 0, "model": { "custom_model": "eager_model" }, "framework": "tfe", } stop = { "timesteps_total": args.stop_timesteps, "training_iteration": args.stop_iters, "episode_reward_mean": args.stop_reward, } results = tune.run(MyTrainer, stop=stop, config=config) if args.as_test: check_learning_achieved(results, args.stop_reward) ray.shutdown()