def test_globaltrainingstatus(tmpdir): path_dir = os.path.join(tmpdir, "test.json") GlobalTrainingStatus.set_parameter_state("Category1", StatusType.LESSON_NUM, 3) GlobalTrainingStatus.save_state(path_dir) with open(path_dir) as fp: test_json = json.load(fp) assert "Category1" in test_json assert StatusType.LESSON_NUM.value in test_json["Category1"] assert test_json["Category1"][StatusType.LESSON_NUM.value] == 3 assert "metadata" in test_json GlobalTrainingStatus.load_state(path_dir) restored_val = GlobalTrainingStatus.get_parameter_state( "Category1", StatusType.LESSON_NUM) assert restored_val == 3 # Test unknown categories and status types (keys) unknown_category = GlobalTrainingStatus.get_parameter_state( "Category3", StatusType.LESSON_NUM) class FakeStatusType(Enum): NOTAREALKEY = "notarealkey" unknown_key = GlobalTrainingStatus.get_parameter_state( "Category1", FakeStatusType.NOTAREALKEY) assert unknown_category is None assert unknown_key is None
def log_current_lesson(self, parameter_name: Optional[str] = None) -> None: """ Logs the current lesson number and sampler value of the parameter with name parameter_name. If no parameter_name is provided, the values and lesson numbers of all parameters will be displayed. """ if parameter_name is not None: settings = self._dict_settings[parameter_name] lesson_number = GlobalTrainingStatus.get_parameter_state( parameter_name, StatusType.LESSON_NUM ) lesson_name = settings.curriculum[lesson_number].name lesson_value = settings.curriculum[lesson_number].value logger.info( f"Parameter '{parameter_name}' is in lesson '{lesson_name}' " f"and has value '{lesson_value}'." ) else: for parameter_name, settings in self._dict_settings.items(): lesson_number = GlobalTrainingStatus.get_parameter_state( parameter_name, StatusType.LESSON_NUM ) lesson_name = settings.curriculum[lesson_number].name lesson_value = settings.curriculum[lesson_number].value logger.info( f"Parameter '{parameter_name}' is in lesson '{lesson_name}' " f"and has value '{lesson_value}'." )
def __init__( self, settings: Optional[Dict[str, EnvironmentParameterSettings]] = None, run_seed: int = -1, restore: bool = False, ): """ EnvironmentParameterManager manages all the environment parameters of a training session. It determines when parameters should change and gives access to the current sampler of each parameter. :param settings: A dictionary from environment parameter to EnvironmentParameterSettings. :param run_seed: When the seed is not provided for an environment parameter, this seed will be used instead. :param restore: If true, the EnvironmentParameterManager will use the GlobalTrainingStatus to try and reload the lesson status of each environment parameter. """ if settings is None: settings = {} self._dict_settings = settings for parameter_name in self._dict_settings.keys(): initial_lesson = GlobalTrainingStatus.get_parameter_state( parameter_name, StatusType.LESSON_NUM) if initial_lesson is None or not restore: GlobalTrainingStatus.set_parameter_state( parameter_name, StatusType.LESSON_NUM, 0) self._smoothed_values: Dict[str, float] = defaultdict(float) for key in self._dict_settings.keys(): self._smoothed_values[key] = 0.0 # Update the seeds of the samplers self._set_sampler_seeds(run_seed)
def get_checkpoints(behavior_name: str) -> List[Dict[str, Any]]: checkpoint_list = GlobalTrainingStatus.get_parameter_state( behavior_name, StatusType.CHECKPOINTS) if not checkpoint_list: checkpoint_list = [] GlobalTrainingStatus.set_parameter_state(behavior_name, StatusType.CHECKPOINTS, checkpoint_list) return checkpoint_list
def get_current_lesson_number(self) -> Dict[str, int]: """ Creates a dictionary from environment parameter to the current lesson number. If not using curriculum, this number is always 0 for that environment parameter. """ result: Dict[str, int] = {} for parameter_name in self._dict_settings.keys(): result[parameter_name] = GlobalTrainingStatus.get_parameter_state( parameter_name, StatusType.LESSON_NUM) return result
def update_lessons( self, trainer_steps: Dict[str, int], trainer_max_steps: Dict[str, int], trainer_reward_buffer: Dict[str, List[float]], ) -> Tuple[bool, bool]: """ Given progress metrics, calculates if at least one environment parameter is in a new lesson and if at least one environment parameter requires the env to reset. :param trainer_steps: A dictionary from behavior_name to the number of training steps this behavior's trainer has performed. :param trainer_max_steps: A dictionary from behavior_name to the maximum number of training steps this behavior's trainer has performed. :param trainer_reward_buffer: A dictionary from behavior_name to the list of the most recent episode returns for this behavior's trainer. :returns: A tuple of two booleans : (True if any lesson has changed, True if environment needs to reset) """ must_reset = False updated = False for param_name, settings in self._dict_settings.items(): lesson_num = GlobalTrainingStatus.get_parameter_state( param_name, StatusType.LESSON_NUM) next_lesson_num = lesson_num + 1 lesson = settings.curriculum[lesson_num] if (lesson.completion_criteria is not None and len(settings.curriculum) > next_lesson_num): behavior_to_consider = lesson.completion_criteria.behavior if behavior_to_consider in trainer_steps: must_increment, new_smoothing = lesson.completion_criteria.need_increment( float(trainer_steps[behavior_to_consider]) / float(trainer_max_steps[behavior_to_consider]), trainer_reward_buffer[behavior_to_consider], self._smoothed_values[param_name], ) self._smoothed_values[param_name] = new_smoothing if must_increment: GlobalTrainingStatus.set_parameter_state( param_name, StatusType.LESSON_NUM, next_lesson_num) new_lesson_name = settings.curriculum[ next_lesson_num].name new_lesson_value = settings.curriculum[ next_lesson_num].value logger.info( f"Parameter '{param_name}' has been updated to {new_lesson_value}." + f" Now in lesson '{new_lesson_name}'") updated = True if lesson.completion_criteria.require_reset: must_reset = True return updated, must_reset
def get_current_samplers( self) -> Dict[str, ParameterRandomizationSettings]: """ Creates a dictionary from environment parameter name to their corresponding ParameterRandomizationSettings. If curriculum is used, the ParameterRandomizationSettings corresponds to the sampler of the current lesson. """ samplers: Dict[str, ParameterRandomizationSettings] = {} for param_name, settings in self._dict_settings.items(): lesson_num = GlobalTrainingStatus.get_parameter_state( param_name, StatusType.LESSON_NUM) lesson = settings.curriculum[lesson_num] samplers[param_name] = lesson.value return samplers
def __init__( self, trainer, brain_name, controller, reward_buff_cap, trainer_settings, training, artifact_path, ): """ Creates a GhostTrainer. :param trainer: The trainer of the policy/policies being trained with self_play :param brain_name: The name of the brain associated with trainer config :param controller: GhostController that coordinates all ghost trainers and calculates ELO :param reward_buff_cap: Max reward history to track in the reward buffer :param trainer_settings: The parameters for the trainer. :param training: Whether the trainer is set for training. :param artifact_path: Path to store artifacts from this trainer. """ super().__init__(brain_name, trainer_settings, training, artifact_path, reward_buff_cap) self.trainer = trainer self.controller = controller self._internal_trajectory_queues: Dict[ str, AgentManagerQueue[Trajectory]] = {} self._internal_policy_queues: Dict[str, AgentManagerQueue[Policy]] = {} self._team_to_name_to_policy_queue: DefaultDict[int, Dict[ str, AgentManagerQueue[Policy]]] = defaultdict(dict) self._name_to_parsed_behavior_id: Dict[str, BehaviorIdentifiers] = {} # assign ghost's stats collection to wrapped trainer's self._stats_reporter = self.trainer.stats_reporter # Set the logging to print ELO in the console self._stats_reporter.add_property(StatsPropertyType.SELF_PLAY, True) self_play_parameters = trainer_settings.self_play self.window = self_play_parameters.window self.play_against_latest_model_ratio = ( self_play_parameters.play_against_latest_model_ratio) if (self.play_against_latest_model_ratio > 1.0 or self.play_against_latest_model_ratio < 0.0): logger.warning( "The play_against_latest_model_ratio is not between 0 and 1.") self.steps_between_save = self_play_parameters.save_steps self.steps_between_swap = self_play_parameters.swap_steps self.steps_to_train_team = self_play_parameters.team_change if self.steps_to_train_team > self.get_max_steps: logger.warning( "The max steps of the GhostTrainer for behavior name {} is less than team change. This team will not face \ opposition that has been trained if the opposition is managed by a different GhostTrainer as in an \ asymmetric game.".format(self.brain_name)) # Counts the number of steps of the ghost policies. Snapshot swapping # depends on this counter whereas snapshot saving and team switching depends # on the wrapped. This ensures that all teams train for the same number of trainer # steps. self.ghost_step: int = 0 # A list of dicts from brain name to a single snapshot for this trainer's policies self.policy_snapshots: List[Dict[str, List[float]]] = [] # A dict from brain name to the current snapshot of this trainer's policies self.current_policy_snapshot: Dict[str, List[float]] = {} self.snapshot_counter: int = 0 # wrapped_training_team and learning team need to be separate # in the situation where new agents are created destroyed # after learning team switches. These agents need to be added # to trainers properly. self._learning_team: int = None self.wrapped_trainer_team: int = None self.last_save: int = 0 self.last_swap: int = 0 self.last_team_change: int = 0 self.initial_elo = GlobalTrainingStatus.get_parameter_state( self.brain_name, StatusType.ELO) if self.initial_elo is None: self.initial_elo = self_play_parameters.initial_elo self.policy_elos: List[float] = [self.initial_elo] * ( self.window + 1) # for learning policy self.current_opponent: int = 0