def test_result_grid_future_checkpoint(ray_start_2_cpus, to_object): trainable_cls = get_trainable_cls("__fake") trial = Trial("__fake", stub=True) trial.config = {"some_config": 1} trial.last_result = {"some_result": 2, "config": trial.config} trainable = ray.remote(trainable_cls).remote() ray.get(trainable.set_info.remote({"info": 4})) if to_object: checkpoint_data = trainable.save_to_object.remote() else: checkpoint_data = trainable.save.remote() trial.on_checkpoint( _TrackedCheckpoint(checkpoint_data, storage_mode=CheckpointStorage.MEMORY)) trial.pickled_error_file = None trial.error_file = None result_grid = ResultGrid(None) # Internal result grid conversion result = result_grid._trial_to_result(trial) assert isinstance(result.checkpoint, Checkpoint) assert isinstance(result.metrics, dict) assert isinstance(result.config, dict) assert result.metrics_dataframe is None assert result.config == {"some_config": 1} assert result.metrics["config"] == result.config # Load checkpoint data (see ray.rllib.algorithms.mock.MockTrainer definition) with result.checkpoint.as_directory() as checkpoint_dir: with open(os.path.join(checkpoint_dir, "mock_agent.pkl"), "rb") as f: info = pickle.load(f) assert info["info"] == 4
def get_agent_cls(agent_name): """Retrieve agent class from global registry. The user must have called `raylab.register_all_agents()` beforehand to have access to Raylab's agents. """ return get_trainable_cls(agent_name)
def _check_default_resources_override(run_identifier): if not isinstance(run_identifier, str): # If obscure dtype, assume it is overridden. return True trainable_cls = get_trainable_cls(run_identifier) return hasattr(trainable_cls, "default_resource_request") and ( trainable_cls.default_resource_request.__code__ != Trainable.default_resource_request.__code__)
def run_rollout(args, parser): config = args.config if not args.env: if not config.get("env"): parser.error("the following arguments are required: --env") args.env = config.get("env") # Create the Trainer from config. cls = get_trainable_cls(args.run) agent = cls(env=args.env, config=config) # Load state from checkpoint. agent.restore(args.checkpoint) num_steps = int(args.steps) num_episodes = int(args.episodes) # Determine the video output directory. use_arg_monitor = False try: args.video_dir except AttributeError: print("There is no such attribute: args.video_dir") use_arg_monitor = True video_dir = None if not use_arg_monitor: if args.monitor: video_dir = os.path.join("./logs", "video") elif args.video_dir: video_dir = os.path.expanduser(args.video_dir) # Do the actual rollout. with rollout.RolloutSaver( args.out, args.use_shelve, write_update_file=args.track_progress, target_steps=num_steps, target_episodes=num_episodes, save_info=args.save_info) as saver: if use_arg_monitor: rollout.rollout( agent, args.env, num_steps, num_episodes, saver, args.no_render, args.monitor) else: rollout.rollout( agent, args.env, num_steps, num_episodes, saver, args.no_render, video_dir)
def _raise_on_durable(trainable_name, sync_to_driver, upload_dir): trainable_cls = get_trainable_cls(trainable_name) from ray.tune.durable_trainable import DurableTrainable if issubclass(trainable_cls, DurableTrainable): if sync_to_driver is not False: raise ValueError( "EXPERIMENTAL: DurableTrainable will automatically sync " "results to the provided upload_dir. " "Set `sync_to_driver=False` to avoid data inconsistencies.") if not upload_dir: raise ValueError( "EXPERIMENTAL: DurableTrainable will automatically sync " "results to the provided upload_dir. " "`upload_dir` must be provided.")
def run_rollout(trainable_type, mission_file, seed): # Writes the mission file for minerl mission_file_path = write_mission_file_for_seed(mission_file, seed) # Instantiate the agent. Note: the IMPALA trainer implementation in # Ray uses an AsyncSamplesOptimizer. Under the hood, this starts a # LearnerThread which will wait for training samples. This will fail # after a timeout, but has no influence on the rollout. See # https://github.com/ray-project/ray/blob/708dff6d8f7dd6f7919e06c1845f1fea0cca5b89/rllib/optimizers/aso_learner.py#L66 config = { "env_config": { "mission": mission_file_path, "is_rollout": True, "seed": seed }, "num_workers": 0 } cls = get_trainable_cls(args.run) agent = cls(env="Minecraft", config=config) # The optimizer is not needed during a rollout agent.optimizer.stop() # Load state from checkpoint agent.restore(f'{checkpoint_path}/{checkpoint_file}') # Get a reference to the environment env = agent.workers.local_worker().env # Let the agent choose actions until the game is over obs = env.reset() done = False total_reward = 0 while not done: action = agent.compute_action(obs) obs, reward, done, info = env.step(action) total_reward += reward print(f'Total reward using seed {seed}: {total_reward}') # This avoids a sigterm trace in the logs, see minerl.env.malmo.Instance env.instance.watcher_process.kill() env.close() agent.stop() return env.get_trajectory()
def as_trainable(self) -> Type[Trainable]: param_dict = self._param_dict base_config = self._config trainer_cls = self.__class__ if isinstance(self._algorithm, str): rllib_trainer = get_trainable_cls(self._algorithm) else: rllib_trainer = self._algorithm class AIRRLTrainer(rllib_trainer): def __init__( self, config: Optional[PartialTrainerConfigDict] = None, env: Optional[Union[str, EnvType]] = None, logger_creator: Optional[Callable[[], Logger]] = None, remote_checkpoint_dir: Optional[str] = None, sync_function_tpl: Optional[str] = None, ): resolved_config = merge_dicts(base_config, config) param_dict["config"] = resolved_config trainer = trainer_cls(**param_dict) rllib_config = trainer._get_rllib_config(process_datasets=True) super(AIRRLTrainer, self).__init__( rllib_config, env, logger_creator, remote_checkpoint_dir, sync_function_tpl, ) @classmethod def default_resource_request( cls, config: PartialTrainerConfigDict ) -> Union[Resources, PlacementGroupFactory]: resolved_config = merge_dicts(base_config, config) param_dict["config"] = resolved_config trainer = trainer_cls(**param_dict) rllib_config = trainer._get_rllib_config( process_datasets=False) return rllib_trainer.default_resource_request(rllib_config) AIRRLTrainer.__name__ = f"AIR{rllib_trainer.__name__}" return AIRRLTrainer
def train(name, ray_config, debug=False): """ Trains sam Parameters ---------- name: name of yaml file ray_config: ray configuration debug: whether to test in editor Returns ------- """ ray.init() trainer_class = get_trainable_cls(ray_config['run']) default_config = trainer_class._default_config.copy() config = merge_dicts(default_config, ray_config['config']) now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M') run = ray_config['run'] model_name = f'{name}_{now}' print(f'\33]0;{model_name} - {name}\a', end='', flush=True) if debug: config['num_workers'] = 0 config['num_envs_per_worker'] = 1 # config['train_batch_size'] = 10 config['env_config']['log_every'] = 2000 trainer = trainer_class(config=config) policy = trainer.get_policy() model = policy.model print(model) for i in range(10): res = trainer.train() print(pretty_print(res)) else: tune.run( run, name=model_name, # stop=ray_config['stop'], local_dir='results', config=config, checkpoint_at_end=True, verbose=2, # restore=RESTORE_PATH, checkpoint_freq=10) ray.shutdown()
def _check_default_resources_override( run_identifier: Union[Experiment, str, Type, Callable]) -> bool: if isinstance(run_identifier, Experiment): run_identifier = run_identifier.run_identifier if isinstance(run_identifier, type): if not issubclass(run_identifier, Trainable): # If obscure dtype, assume it is overridden. return True trainable_cls = run_identifier elif callable(run_identifier): trainable_cls = run_identifier elif isinstance(run_identifier, str): trainable_cls = get_trainable_cls(run_identifier) else: # Default to True return True return hasattr(trainable_cls, "default_resource_request") and ( trainable_cls.default_resource_request.__code__ != Trainable.default_resource_request.__code__)
def get_trainable_cls(self): return get_trainable_cls(self.trainable_name)
all_steps = [] all_dist = [] all_power_total = [] all_cot = [] all_vel = [] for experiment in range(0, len(exp_params)): with open(exp_params[experiment], "rb") as f: config = pickle.load(f) if "num_workers" in config: config["num_workers"] = min(2, config["num_workers"]) config["create_env_on_driver"] = True config['env_config']['hf_smoothness'] = hf_smoothness_eval if "no_eager_on_workers" in config: del config["no_eager_on_workers"] cls = get_trainable_cls('PPO') agent = cls(env=config['env'], config=config) # Load state from checkpoint. agent.restore(exp_checkpoint[experiment]) if hasattr(agent, "workers") and isinstance(agent.workers, WorkerSet): env = agent.workers.local_worker().env res_rollout = rollout_episodes(env, agent, num_episodes=100, num_steps=1000, render=False) # Write detailed data to panda file for sim_it in range(0, len(res_rollout[0])): new_pd_entry = pd.Series({ "approach":
def run(args, parser): # Load configuration from checkpoint file. config_path = "" if args.checkpoint: config_dir = os.path.dirname(args.checkpoint) config_path = os.path.join(config_dir, "params.pkl") # Try parent directory. if not os.path.exists(config_path): config_path = os.path.join(config_dir, "../params.pkl") # Load the config from pickled. if os.path.exists(config_path): with open(config_path, "rb") as f: config = cloudpickle.load(f) # If no pkl file found, require command line `--config`. else: # If no config in given checkpoint -> Error. if args.checkpoint: raise ValueError( "Could not find params.pkl in either the checkpoint dir or " "its parent directory AND no `--config` given on command " "line!") # Use default config for given agent. _, config = get_trainer_class(args.run, return_config=True) # Make sure worker 0 has an Env. config["create_env_on_driver"] = True # Merge with `evaluation_config` (first try from command line, then from # pkl file). evaluation_config = copy.deepcopy( args.config.get("evaluation_config", config.get("evaluation_config", {}))) config = merge_dicts(config, evaluation_config) # Merge with command line `--config` settings (if not already the same # anyways). config = merge_dicts(config, args.config) if not args.env: if not config.get("env"): parser.error("the following arguments are required: --env") args.env = config.get("env") # Make sure we have evaluation workers. if not config.get("evaluation_num_workers"): config["evaluation_num_workers"] = config.get("num_workers", 0) if not config.get("evaluation_num_episodes"): config["evaluation_num_episodes"] = 1 config["render_env"] = not args.no_render config["record_env"] = args.video_dir ray.init(local_mode=args.local_mode) # Create the Trainer from config. cls = get_trainable_cls(args.run) agent = cls(env=args.env, config=config) # Load state from checkpoint, if provided. if args.checkpoint: agent.restore(args.checkpoint) num_steps = int(args.steps) num_episodes = int(args.episodes) # Determine the video output directory. video_dir = None # Allow user to specify a video output path. if args.video_dir: video_dir = os.path.expanduser(args.video_dir) # Do the actual rollout. with RolloutSaver(args.out, args.use_shelve, write_update_file=args.track_progress, target_steps=num_steps, target_episodes=num_episodes, save_info=args.save_info) as saver: rollout(agent, args.env, num_steps, num_episodes, saver, args.no_render, video_dir) agent.stop()
def is_durable_trainable(self): # Local import to avoid cyclical dependencies from ray.tune.durable_trainable import DurableTrainable trainable_cls = get_trainable_cls(self._run_identifier) return issubclass(trainable_cls, DurableTrainable)
def durable(trainable: Union[str, Type[Trainable], Callable]): """Convert trainable into a durable trainable. Durable trainables are used to upload trial results and checkpoints to cloud storage, like e.g. AWS S3. This function can be used to convert your trainable, i.e. your trainable classes, functions, or string identifiers, to a durable trainable. To make durable trainables work, you should pass a valid :class:`SyncConfig <ray.tune.SyncConfig>` object to `tune.run()`. Example: .. code-block:: python from ray import tune analysis = tune.run( tune.durable("PPO"), config={"env": "CartPole-v0"}, checkpoint_freq=1, sync_config=tune.SyncConfig( sync_to_driver=False, upload_dir="s3://your-s3-bucket/durable-ppo/", )) You can also convert your trainable functions: .. code-block:: python tune.run( tune.durable(your_training_fn), # ... ) And your class functions: .. code-block:: python tune.run( tune.durable(YourTrainableClass), # ... ) Args: trainable (str|Type[Trainable]|Callable): Trainable. Can be a string identifier, a trainable class, or a trainable function. Returns: A durable trainable class wrapped around your trainable. """ if isinstance(trainable, str): trainable_cls = get_trainable_cls(trainable) else: trainable_cls = trainable if not inspect.isclass(trainable_cls): # Function API return wrap_function(trainable_cls, durable=True) if not issubclass(trainable_cls, Trainable): raise ValueError( "You can only use `durable()` with valid trainables. The class " "you passed does not inherit from `Trainable`. Please make sure " f"it does. Got: {type(trainable_cls)}") # else: Class API class _WrappedDurableTrainable(DurableTrainable, trainable_cls): _name = trainable_cls.__name__ if hasattr(trainable_cls, "__name__") \ else "durable_trainable" return _WrappedDurableTrainable
def get_trainable_cls(self): if self.stub: return None return get_trainable_cls(self.trainable_name)
def run(args, parser): config = {} # Load configuration from checkpoint file. config_dir = os.path.dirname(args.checkpoint) config_path = os.path.join(config_dir, "params.pkl") # Try parent directory. if not os.path.exists(config_path): config_path = os.path.join(config_dir, "../params.pkl") # If no pkl file found, require command line `--config`. if not os.path.exists(config_path): if not args.config: raise ValueError( "Could not find params.pkl in either the checkpoint dir or " "its parent directory AND no config given on command line!") # Load the config from pickled. else: with open(config_path, "rb") as f: config = pickle.load(f) if args.use_cpu: # When you don't want to run with any gpus. config["num_gpus_per_worker"] = 0 config["num_gpus"] = 0 os.environ["CUDA_VISIBLE_DEVICES"] = "" config["num_workers"] = 1 # # Set num_workers to be at least 2. # if "num_workers" in config: # config["num_workers"] = min(2, config["num_workers"]) # Merge with `evaluation_config`. evaluation_config = copy.deepcopy(config.get("evaluation_config", {})) # ADDED if args.deterministic_policy: evaluation_config["explore"] = False config["explore"] = False if "env_config" in evaluation_config: evaluation_config["env_config"]["num_levels"] = 1 evaluation_config["env_config"]["use_sequential_levels"] = True evaluation_config["env_config"][ "start_level"] = 0 if args.level_seed is None else args.level_seed config["env_config"]["num_levels"] = 1 config["env_config"]["use_sequential_levels"] = True config["env_config"][ "start_level"] = 0 if args.level_seed is None else args.level_seed # END ADDED config = merge_dicts(config, evaluation_config) # Merge with command line `--config` settings. config = merge_dicts(config, args.config) if not args.env: if not config.get("env"): parser.error("the following arguments are required: --env") args.env = config.get("env") ray.init() # Create the Trainer from config. cls = get_trainable_cls(args.run) agent = cls(env=args.env, config=config) # Load state from checkpoint. agent.restore(args.checkpoint) num_steps = int(args.steps) num_episodes = int(args.episodes) video_dir = None if args.video_dir: video_dir = os.path.expanduser(args.video_dir) vis_info = rollout(agent, args.env, num_steps, num_episodes, video_dir, config, level_seed=args.level_seed) visualize_info(vis_info, video_dir)
def get_agent_cls(agent_name): """Retrieve agent class from global registry.""" return get_trainable_cls(agent_name)
def self_play_workflow(config): """ Expects in config: checkpoint checkpoint to load from (None if new) trainer trainer to use model model to use in learning percentage_equal: float The maximal allowed percentage that equal opponents get game results. (see binomial test) lr_schedule: List of lr Learning rates to use. Will use first to last and update each time the model gets worse. training_rounds Rounds of training evaluation_rounds Rounds of evaluation 1. Generate a large batch of self-play games. 2. Train. 3. Test the updated bot against the previous version. 4. If the bot is measurably stronger, switch to this new version. 5. If the bot is about the same strength, generate more games and train again. 6. If the bot gets significantly weaker, adjust the optimizer settings and retrain. """ ########################################## # Set config of trainer and evaluators ########################################## check_dir = 'logs' log_file = 'logs/logs.txt' if os.path.exists(log_file): os.remove(log_file) if config.get("evaluation_num_episodes", None) is None: config["evaluation_num_episodes"] = 1 trainer_fn = get_trainable_cls(config["trainer"]) lr_idx = 0 def select_policy_train(agent_id): if agent_id == "player1": return np.random.choice( ["learning_white", "previous_white", "random"], 1, p=[.6, .3, .1])[0] else: return np.random.choice( ["learning_black", "previous_black", "random"], 1, p=[.6, .3, .1])[0] def select_policy_eval(learning_player, agent_id): if learning_player == "player1": if agent_id == "player1": return "learning_white" else: return "previous_black" else: if agent_id == "player2": return "learning_black" else: return "previous_white" trainer_config = copy.deepcopy(config) # remove self-play parameters trainer_config.pop("trainer") trainer_config.pop("percentage_equal") trainer_config.pop("model") trainer_config.pop("training_rounds") trainer_config.pop("evaluation_rounds") trainer_config.pop("checkpoint", None) trainer_config.pop("lr_schedule", None) trainer_config.pop("evaluation_interval", None) trainer_config["lr"] = config["lr_schedule"][lr_idx] trainer_config["multiagent"] = { "policies_to_train": ["learning_white", "learning_black"], "policies": { "random": (PolicyRandom, config["env"].observation_space, config["env"].action_space, {}), "learning_white": (None, config["env"].observation_space, config["env"].action_space, { "model": config["model"] }), "learning_black": (None, config["env"].observation_space, config["env"].action_space, { "model": config["model"] }), "previous_white": (None, config["env"].observation_space, config["env"].action_space, { "model": config["model"] }), "previous_black": (None, config["env"].observation_space, config["env"].action_space, { "model": config["model"] }), }, "policy_mapping_fn": select_policy_train, } trainer_config["train_batch_size"] = 2 * config["train_batch_size"] eval_config_player1 = copy.deepcopy(trainer_config) eval_config_player1["multiagent"]["policy_mapping_fn"] = partial( select_policy_eval, "player1") eval_config_player1["multiagent"]["policies_to_train"] = [] eval_config_player2 = copy.deepcopy(trainer_config) eval_config_player2["multiagent"]["policy_mapping_fn"] = partial( select_policy_eval, "player2") eval_config_player2["multiagent"]["policies_to_train"] = [] ########################################## # Run train / evaluation rounds ########################################## def update_for_next_loop(total_rounds, rounds, reset=False): done = False if reset: next_num_rounds = rounds.min else: if (total_rounds >= rounds.max): done = True next_num_rounds = rounds.step return done, next_num_rounds ray.init() trainer = trainer_fn(env=trainer_config["env"], config=trainer_config) evaluator_player1 = trainer_fn(env=eval_config_player1["env"], config=eval_config_player1) evaluator_player2 = trainer_fn(env=eval_config_player1["env"], config=eval_config_player2) total_rounds_training = 0 done, training_rounds = update_for_next_loop(total_rounds_training, config["training_rounds"], True) prev_it_state = config.get("checkpoint", None) prev_state = prev_it_state while not done: ########################################## # Train ########################################## try: if prev_it_state is not None: trainer.restore(prev_it_state) for _ in range(training_rounds): trainer.train() state = trainer.save(check_dir) # trainer.stop() total_rounds_training += training_rounds except Exception: trainer.stop() with open(log_file, 'a') as f: f.write("Model failed, updating optimizer\n") lr_idx += 1 if lr_idx < len(config["lr_schedule"]): trainer_config["lr"] = config["lr_schedule"][lr_idx] trainer = trainer_fn(env=trainer_config["env"], config=trainer_config) total_rounds_training = 0 done, training_rounds = update_for_next_loop( total_rounds_training, config["training_rounds"], True) prev_it_state = prev_state else: done = True continue # try again. ########################################## # Evaluate ########################################## try: total_eval_rounds = 0 comparison_wrt_equal = 1 eval_results1 = [] eval_results2 = [] # maximal evaluation rounds determined by training, does not make sense to evaluate more than training rounds. eval_info = InfoNumberRounds( config["evaluation_rounds"].min, min(config["evaluation_rounds"].max, total_rounds_training), config["evaluation_rounds"].step) done_eval, eval_rounds = update_for_next_loop( total_eval_rounds, eval_info, True) while not done_eval: num_episodes = eval_rounds * config["evaluation_num_episodes"] evaluator_player1.restore(state) eval_results1.extend( own_evaluation(evaluator_player1, eval_rounds)) num_pos = sum(x == 1 for x in eval_results1) num_neg = sum(x == -1 for x in eval_results1) comparison_wrt_equal1 = binom_test(num_pos, num_pos + num_neg, 0.5) with open(log_file, 'a') as f: f.write( f'results1: trained agent wins: {num_pos} previous agent wins: {num_neg} remises: {sum(x == 0 for x in eval_results1)} \n' ) f.write( f'chance result for equal opponents: {comparison_wrt_equal1} \n' ) evaluator_player2.restore(state) eval_results2.extend( own_evaluation(evaluator_player2, eval_rounds)) num_pos = sum(x == 1 for x in eval_results2) num_neg = sum(x == -1 for x in eval_results2) comparison_wrt_equal2 = binom_test(num_neg, num_pos + num_neg, 0.5) with open(log_file, 'a') as f: f.write( f'results2: trained agent wins: {num_neg} previous agent wins: {num_pos} remises: {sum(x == 0 for x in eval_results2)} \n' ) f.write( f'chance result for equal opponents: {comparison_wrt_equal2} \n' ) total_eval_rounds += eval_rounds done_eval, eval_rounds = update_for_next_loop( total_eval_rounds, eval_info) if config["percentage_equal"] > comparison_wrt_equal1 or config[ "percentage_equal"] > comparison_wrt_equal2: # one of players improved done_eval = True except Exception: with open(log_file, 'a') as f: f.write("Model failed, need to update optimizer\n") # trigger update optimizer comparison_wrt_equal1 = 0 comparison_wrt_equal2 = 0 eval_results1 = [-1] eval_results2 = [1] ########################################## # Update policy ########################################## if config["percentage_equal"] > comparison_wrt_equal1 or config[ "percentage_equal"] > comparison_wrt_equal2: # results differ enough if sum(x == 1 for x in eval_results1) > sum( x == -1 for x in eval_results1) and sum( x == -1 for x in eval_results2) > sum(x == 1 for x in eval_results2): with open(log_file, 'a') as f: f.write("Model improved\n") total_rounds_training = 0 done, training_rounds = update_for_next_loop( total_rounds_training, config["training_rounds"], True) # reupdate previous key_previous_val_learning_white = {} for (k, v), (k2, v2) in zip( trainer.get_policy( "previous_white").get_weights().items(), trainer.get_policy( "learning_white").get_weights().items()): key_previous_val_learning_white[k] = v2 key_previous_val_learning_black = {} for (k, v), (k2, v2) in zip( trainer.get_policy( "previous_black").get_weights().items(), trainer.get_policy( "learning_black").get_weights().items()): key_previous_val_learning_black[k] = v2 # set weights trainer.set_weights({ "previous_white": key_previous_val_learning_white, "previous_black": key_previous_val_learning_black, # no change "learning_white": trainer.get_policy("learning_white").get_weights(), "learning_black": trainer.get_policy("learning_black").get_weights(), }) if prev_state is not None: trainer.delete_checkpoint(prev_state) trainer.delete_checkpoint(state) prev_it_state = trainer.save(check_dir) prev_state = prev_it_state elif sum(x == 1 for x in eval_results1) < sum( x == -1 for x in eval_results1) and sum( x == -1 for x in eval_results2) < sum(x == 1 for x in eval_results2): with open(log_file, 'a') as f: f.write("Model got worse, updating optimizer\n") trainer.stop() lr_idx += 1 if lr_idx < len(config["lr_schedule"]): trainer_config["lr"] = config["lr_schedule"][lr_idx] trainer = trainer_fn(env=trainer_config["env"], config=trainer_config) total_rounds_training = 0 done, training_rounds = update_for_next_loop( total_rounds_training, config["training_rounds"], True) prev_it_state = prev_state else: done = True else: with open(log_file, 'a') as f: f.write( "One player improved one got worse, trying more learning iterations.\n" ) done, training_rounds = update_for_next_loop( total_rounds_training, config["training_rounds"]) prev_it_state = state else: with open(log_file, 'a') as f: f.write( "Unable to evaluate, trying more learning iterations.\n") done, training_rounds = update_for_next_loop( total_rounds_training, config["training_rounds"]) prev_it_state = state trainer.restore(prev_it_state) trainer.save() print("Checkpoint and trainer saved at: ", trainer.logdir) with open(log_file, 'a') as f: f.write(f'Checkpoint and trainer saved at: {trainer.logdir} \n')
def _check_default_resources_override(run_identifier): trainable_cls = get_trainable_cls(run_identifier) return hasattr(trainable_cls, "default_resource_request") and ( trainable_cls.default_resource_request.__code__ != Trainable.default_resource_request.__code__)
def run_rollouts(args, config): """Run rollouts (if not bundling existing rollouts) :args: Argparse.Args: User defined arguments :config: Dict: Execution Configuration """ # Make sure configuration has the correct outpath config['callbacks'] = lambda: RLlibIxdrlCallbacks(args=args, config=config) #Spin up Ray only if it is not already running if args.init_ray: ray.init(local_mode=args.local_mode) # Set up environment env = gym.make(args.env_name) # Wrap environment env = wrap_env(env, args.wrappers) # Register environment with Ray register_env(args.env_name, lambda config: env) # Create the model Trainer from config. cls = get_trainable_cls(args.model_name) # Instantiate agent agent = cls(env=args.env_name, config=config, logger_creator=cameleon_logger_creator(args.writer_dir)) # Restore agent if needed if args.checkpoint_path: # This is not ideal, but only way to guarantee # correct information about model. Add slight overhead # Need to restore the model for rollouts but, then # must restart to feed information to logger logging.info( "Restoring agent twice to feed information correctly to logger") agent.restore(args.checkpoint_path) # Make sure configuration has the correct outpath args.epochs_trained = agent._iteration if agent._iteration is not None else 0 # Make sure configuration has the correct outpath config['callbacks'] = lambda: RLlibIxdrlCallbacks(args=args, config=config) # Need to run setup again with new callbacks agent.setup(config) agent.restore(args.checkpoint_path) # Do the actual rollout. run_rollout(agent, env, args.env_name, args.num_timesteps, args.num_episodes, args.no_render, args.video_dir, args=args) # Stop the agent agent.stop() # Get the gross files out of there cleanup(config['monitor'], args.writer_dir, ext=args.ext, sync_bundles=args.sync_bundles)
def as_trainable(self) -> Type[Trainable]: param_dict = self._param_dict base_config = self._config or {} trainer_cls = self.__class__ preprocessor = self.preprocessor if isinstance(self._algorithm, str): rllib_trainer = get_trainable_cls(self._algorithm) else: rllib_trainer = self._algorithm class AIRRLTrainer(rllib_trainer): def __init__( self, config: Optional[PartialAlgorithmConfigDict] = None, env: Optional[Union[str, EnvType]] = None, logger_creator: Optional[Callable[[], Logger]] = None, remote_checkpoint_dir: Optional[str] = None, custom_syncer: Optional[Syncer] = None, ): resolved_config = merge_dicts(base_config, config or {}) param_dict["config"] = resolved_config trainer = trainer_cls(**param_dict) rllib_config = trainer._get_rllib_config(process_datasets=True) super(AIRRLTrainer, self).__init__( config=rllib_config, env=env, logger_creator=logger_creator, remote_checkpoint_dir=remote_checkpoint_dir, custom_syncer=custom_syncer, ) def save_checkpoint(self, checkpoint_dir: str): checkpoint_path = super(AIRRLTrainer, self).save_checkpoint(checkpoint_dir) trainer_class_path = os.path.join(checkpoint_dir, RL_TRAINER_CLASS_FILE) with open(trainer_class_path, "wb") as fp: cpickle.dump(self.__class__, fp) config_path = os.path.join(checkpoint_dir, RL_CONFIG_FILE) with open(config_path, "wb") as fp: cpickle.dump(self.config, fp) if preprocessor: save_preprocessor_to_dir(preprocessor, checkpoint_dir) return checkpoint_path @classmethod def default_resource_request( cls, config: PartialAlgorithmConfigDict ) -> Union[Resources, PlacementGroupFactory]: resolved_config = merge_dicts(base_config, config) param_dict["config"] = resolved_config trainer = trainer_cls(**param_dict) rllib_config = trainer._get_rllib_config( process_datasets=False) return rllib_trainer.default_resource_request(rllib_config) AIRRLTrainer.__name__ = f"AIR{rllib_trainer.__name__}" return AIRRLTrainer
def visualizer_rllib(args, seed=None): """Visualizer for RLlib experiments. This function takes args (see function create_parser below for more detailed information on what information can be fed to this visualizer), and renders the experiment associated with it. """ result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # check if we have a multiagent environment but in a # backwards compatible way if config.get('multiagent', {}).get('policies', None): multiagent = True pkl = get_rllib_pkl(result_dir) config['multiagent'] = pkl['multiagent'] else: multiagent = False config['callbacks'] = MyCallbacks # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) #flow_params['env'].additional_params["use_seeds"]=args.use_seeds # print(args.use_seeds) seed_tmp = None if seed: with open(seed, 'rb') as f: seed_tmp = pickle.load(f) config['seed'] = int(seed_tmp['rllib_seed']) elif args.use_seeds: with open(args.use_seeds, 'rb') as f: seed_tmp = pickle.load(f) config['seed'] = int(seed_tmp['rllib_seed']) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) if seed_tmp: #setattr(sim_params, 'seed', seed_tmp['sumo_seed']) sim_params.seed = int(int(seed_tmp['sumo_seed']) / 10**6) print(sim_params.seed) #import IPython #IPython.embed() # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) # Merge with `evaluation_config`. evaluation_config = copy.deepcopy(config.get("evaluation_config", {})) config = merge_dicts(config, evaluation_config) if args.run: agent_cls = get_trainable_cls(args.run) elif config_run: agent_cls = get_trainable_cls(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = True dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/test_time_rollout/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = True print('NOTE: With render mode {}, an extra instance of the SUMO GUI ' 'will display before the GUI for visualizing the result. Click ' 'the green Play arrow to continue.'.format(args.render_mode)) elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True #if seed is not None: # print(seed) # flow_params["env"].additional_params["use_seeds"] = seed # input() #else: # flow_params["env"].additional_params["use_seeds"] = args.use_seeds if args.horizon: config['horizon'] = args.horizon flow_params['env'].horizon = args.horizon # Create and register a gym+rllib env register_time = time.time() create_env, env_name = make_create_env(params=flow_params, version=0, seeds_file=seed) register_env(env_name, create_env) register_time = time.time() - register_time print("Register Time:", register_time) # check if the environment is a single or multiagent environment, and # get the right address accordingly # single_agent_envs = [env for env in dir(flow.envs) # if not env.startswith('__')] # if flow_params['env_name'] in single_agent_envs: # env_loc = 'flow.envs' # else: # env_loc = 'flow.envs.multiagent' # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] env_params.restart_instance = True #False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) create_time = time.time() if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) create_time = time.time() - create_time print("Create time:", create_time) if multiagent: rets = {} # map the agent id to its policy print(config['multiagent']['policy_mapping_fn']) policy_map_fn = config['multiagent']['policy_mapping_fn'] #.func for key in config['multiagent']['policies'].keys(): rets[key] = [] else: rets = [] if config['model']['use_lstm']: use_lstm = True if multiagent: state_init = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func size = config['model']['lstm_cell_size'] for key in config['multiagent']['policies'].keys(): state_init[key] = [ np.zeros(size, np.float32), np.zeros(size, np.float32) ] else: state_init = [ np.zeros(config['model']['lstm_cell_size'], np.float32), np.zeros(config['model']['lstm_cell_size'], np.float32) ] else: use_lstm = False restart_time = time.time() env.restart_simulation(sim_params=sim_params, render=sim_params.render) restart_time = time.time() - restart_time print("Restart Time:", restart_time) # Simulate and collect metrics final_outflows = [] final_inflows = [] mean_speed = [] std_speed = [] if PRINT_TO_SCREEN: pp = pprint.PrettyPrinter(indent=2) print("config ") pp.pprint(config) print("flow_params ") pp.pprint(flow_params) if REALTIME_PLOTS: # prepare plots # You probably won't need this if you're embedding things in a tkinter plot... plt.ion() fig = plt.figure() axA = fig.add_subplot(331) axA.set_title("Actions") axR = fig.add_subplot(332) axR.set_title("Rewards") axS = fig.add_subplot(333) axS.set_title("States") axS0 = fig.add_subplot(334) axS0.set_title("S0") axS1 = fig.add_subplot(335) axS1.set_title("S1") axS2 = fig.add_subplot(336) axS2.set_title("S2") axA_hist = fig.add_subplot(337) axA_hist.set_title("Actions") axR_hist = fig.add_subplot(338) axR_hist.set_title("Rewards") axS_hist = fig.add_subplot(339) axS_hist.set_title("States") axS.set_ylim((-2, 3)) axA.set_ylim((-5, 5)) axR.set_ylim((-1, 1)) initialized_plot = False # record for visualization purposes actions = [] rewards = [] states = [] times = [] WARMUP = args.warmup run_time = time.time() for i in range(args.num_rollouts): vel = [] time_to_exit = 0 state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): time_to_exit += 1 vehicles = env.unwrapped.k.vehicle if np.mean(vehicles.get_speed(vehicles.get_ids())) > 0: vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) #vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) if multiagent: action = {} for agent_id in state.keys(): if use_lstm: action[agent_id], state_init[agent_id], logits = \ agent.compute_action( state[agent_id], state=state_init[agent_id], policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(state) state, reward, done, _ = env.step(action) if SUMMARY_PLOTS: # record for visualization purposes actions.append(action) rewards.append(reward) states.append(state) if PRINT_TO_SCREEN: print("action") pp.pprint(action) print("reward") pp.pprint(reward) print("state") pp.pprint(state) print("after step ") if REALTIME_PLOTS: # Update plots. if not initialized_plot: # initialize lineA, = axA.plot( [0] * len(action), 'g^' ) # Returns a tuple of line objects, thus the comma lineR, = axR.plot( 0, 'bs' ) # Returns a tuple of line objects, thus the comma lineS, = axS.plot( [0] * len(state), 'r+' ) # Returns a tuple of line objects, thus the comma initialized_plot = True lineA.set_ydata(action) lineR.set_ydata(reward) lineS.set_ydata(state) fig.canvas.draw() fig.canvas.flush_events() if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if args.use_delay > 0: if vehicles.get_num_arrived() >= args.use_delay: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(5000) final_outflows.append(outflow) inflow = vehicles.get_inflow_rate(5000) final_inflows.append(inflow) times.append(time_to_exit) if np.all(np.array(final_inflows) > 1e-5): throughput_efficiency = [ x / y for x, y in zip(final_outflows, final_inflows) ] else: throughput_efficiency = [0] * len(final_inflows) mean_speed.append(np.mean(vel)) std_speed.append(np.std(vel)) if multiagent: for agent_id, rew in rets.items(): print('Round {}, Return: {} for agent {}'.format( i, ret, agent_id)) else: print('Round {}, Return: {}'.format(i, ret)) run_time = time.time() - run_time print('==== Summary of results ====') print("Run Time: ", run_time) print("Return:") env.close() return_reward = 0 if multiagent: for agent_id, rew in rets.items(): print('For agent', agent_id) print(rew) print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) return_reward = np.mean(rew) else: print(rets) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(rets), np.std(rets))) return_reward = np.mean(rets) print("\nSpeed, mean (m/s):") print(mean_speed) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(mean_speed), np.std(mean_speed))) print("\nSpeed, std (m/s):") print(std_speed) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(std_speed), np.std(std_speed))) # Compute arrival rate of vehicles in the last 500 sec of the run print("\nOutflows (veh/hr):") print(final_outflows) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(final_outflows), np.std(final_outflows))) # Compute departure rate of vehicles in the last 500 sec of the run print("Inflows (veh/hr):") print(final_inflows) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(final_inflows), np.std(final_inflows))) # Compute throughput efficiency in the last 500 sec of the print("Throughput efficiency (veh/hr):") print(throughput_efficiency) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(throughput_efficiency), np.std(throughput_efficiency))) print("Time Delay") print(times) print("Time for certain number of vehicles to exit {:.2f},{:.5f}".format( (np.mean(times)), np.std(times))) if args.output: np.savetxt(args.output, [ return_reward, mean_speed, std_speed, final_inflows, final_outflows, times ]) if SUMMARY_PLOTS: generateHtmlplots(actions, rewards, states) # terminate the environment env.unwrapped.terminate() env.terminate() # Deleting the env in order to remove sumo process del env del evaluation_config # if prompted, convert the emission file into a csv file if args.gen_emission: time.sleep(0.1) dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(env.network.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) # convert the emission file into a csv file emission_to_csv(emission_path) # print the location of the emission csv file emission_path_csv = emission_path[:-4] + ".csv" print("\nGenerated emission file at " + emission_path_csv) # delete the .xml version of the emission file os.remove(emission_path) # if we wanted to save the render, here we create the movie if args.save_render: dirs = os.listdir(os.path.expanduser('~') + '/flow_rendering') # Ignore hidden files dirs = [d for d in dirs if d[0] != '.'] dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S")) recent_dir = dirs[-1] # create the movie movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir save_dir = os.path.expanduser('~') + '/flow_movies' if not os.path.exists(save_dir): os.mkdir(save_dir) os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png" os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4" os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/" os.system(os_cmd) return return_reward, mean_speed, final_inflows, final_outflows
def load_agent_config(args): # Load configuration from checkpoint file. config_path = "" if args.checkpoint: config_dir = os.path.dirname(args.checkpoint) config_path = os.path.join(config_dir, "params.pkl") # Try parent directory. if not os.path.exists(config_path): config_path = os.path.join(config_dir, "../params.pkl") # Load the config from pickled. if os.path.exists(config_path): with open(config_path, "rb") as f: config = cloudpickle.load(f) # If no pkl file found, require command line `--config`. else: # If no config in given checkpoint -> Error. if args.checkpoint: raise ValueError( "Could not find params.pkl in either the checkpoint dir or " "its parent directory AND no `--config` given on command " "line!") # Use default config for given agent. _, config = get_trainer_class(args.run, return_config=True) # Make sure worker 0 has an Env. config["num_workers"] = 0 config["num_envs_per_worker"] = 1 config["create_env_on_driver"] = True # Merge with `evaluation_config` (first try from command line, then from # pkl file). evaluation_config = copy.deepcopy( args.config.get("evaluation_config", config.get("evaluation_config", {}))) config = merge_dicts(config, evaluation_config) # Merge with command line `--config` settings (if not already the same # anyways). config = merge_dicts(config, args.config) if not args.env: args.env = config.get("env") # Make sure we have evaluation workers. # if not config.get("evaluation_num_workers"): # config["evaluation_num_workers"] = config.get("num_workers", 0) if not config.get("evaluation_num_episodes"): config["evaluation_num_episodes"] = 1 config["render_env"] = args.render config["record_env"] = args.video_dir if config.get("env_config") is None: config["env_config"] = {} print(args.agent_speeds) config["env_config"]["agent_speeds"] = args.agent_speeds register_env(args.env, env_creator) # Create the Trainer from config. cls = get_trainable_cls(args.run) agent = cls(env=args.env, config=config) # Load state from checkpoint, if provided. if args.checkpoint: agent.restore(args.checkpoint) return agent, config
def run(args, parser): # Load configuration from checkpoint file. config_dir = os.path.dirname(args.checkpoint) config_path = os.path.join(config_dir, "params.pkl") # Try parent directory. if not os.path.exists(config_path): config_path = os.path.join(config_dir, "../params.pkl") # If no pkl file found, require command line `--config`. if not os.path.exists(config_path): if not args.config: raise ValueError( "Could not find params.pkl in either the checkpoint dir or " "its parent directory AND no config given on command line!") else: config = args.config # Load the config from pickled. else: with open(config_path, "rb") as f: config = pickle.load(f) # Set num_workers to be at least 2. if "num_workers" in config: config["num_workers"] = min(2, config["num_workers"]) # Merge with `evaluation_config` (first try from command line, then from # pkl file). evaluation_config = copy.deepcopy( args.config.get("evaluation_config", config.get("evaluation_config", {}))) config = merge_dicts(config, evaluation_config) # Merge with command line `--config` settings (if not already the same # anyways). config = merge_dicts(config, args.config) if not args.env: if not config.get("env"): parser.error("the following arguments are required: --env") args.env = config.get("env") ray.init() # Create the Trainer from config. cls = get_trainable_cls(args.run) agent = cls(env=args.env, config=config) # Load state from checkpoint. agent.restore(args.checkpoint) num_steps = int(args.steps) num_episodes = int(args.episodes) # Determine the video output directory. # Deprecated way: Use (--out|~/ray_results) + "/monitor" as dir. video_dir = None if args.monitor: video_dir = os.path.join( os.path.dirname(args.out or "") or os.path.expanduser("~/ray_results/"), "monitor") # New way: Allow user to specify a video output path. elif args.video_dir: video_dir = os.path.expanduser(args.video_dir) # Do the actual rollout. with RolloutSaver(args.out, args.use_shelve, write_update_file=args.track_progress, target_steps=num_steps, target_episodes=num_episodes, save_info=args.save_info) as saver: rollout(agent, args.env, num_steps, num_episodes, saver, args.no_render, video_dir) agent.stop()
def run(args, parser): # Load configuration from checkpoint file. config_path = "" if args.checkpoint: config_dir = os.path.dirname(args.checkpoint) config_path = os.path.join(config_dir, "params.pkl") # Try parent directory. if not os.path.exists(config_path): config_path = os.path.join(config_dir, "../params.pkl") # Load the config from pickled. if os.path.exists(config_path): with open(config_path, "rb") as f: config = cloudpickle.load(f) # If no pkl file found, require command line `--config`. else: # If no config in given checkpoint -> Error. if args.checkpoint: raise ValueError( "Could not find params.pkl in either the checkpoint dir or " "its parent directory AND no `--config` given on command " "line!") # Use default config for given agent. _, config = get_algorithm_class(args.run, return_config=True) # Make sure worker 0 has an Env. config["create_env_on_driver"] = True # Merge with `evaluation_config` (first try from command line, then from # pkl file). evaluation_config = copy.deepcopy( args.config.get("evaluation_config", config.get("evaluation_config", {}))) config = merge_dicts(config, evaluation_config) # Merge with command line `--config` settings (if not already the same # anyways). config = merge_dicts(config, args.config) if not args.env: if not config.get("env"): parser.error("the following arguments are required: --env") args.env = config.get("env") # Make sure we have evaluation workers. if not config.get("evaluation_num_workers"): config["evaluation_num_workers"] = config.get("num_workers", 0) if not config.get("evaluation_duration"): config["evaluation_duration"] = 1 # Hard-override this as it raises a warning by Trainer otherwise. # Makes no sense anyways, to have it set to None as we don't call # `Trainer.train()` here. config["evaluation_interval"] = 1 # Rendering and video recording settings. if args.no_render: deprecation_warning(old="--no-render", new="--render", error=False) args.render = False config["render_env"] = args.render ray.init(local_mode=args.local_mode) # Create the Trainer from config. cls = get_trainable_cls(args.run) agent = cls(env=args.env, config=config) # Load state from checkpoint, if provided. if args.checkpoint: agent.restore(args.checkpoint) num_steps = int(args.steps) num_episodes = int(args.episodes) # Do the actual rollout. with RolloutSaver( args.out, args.use_shelve, write_update_file=args.track_progress, target_steps=num_steps, target_episodes=num_episodes, save_info=args.save_info, ) as saver: rollout(agent, args.env, num_steps, num_episodes, saver, not args.render) agent.stop()