def plot_summaries(history_lists: Sequence[Sequence[History]], labels: Sequence[str], prefix: str = "."): """ Plot summaries of several experiments. :param history_lists: the list of experiments, i.e. list of list of histories. :param labels: the labels associated to each experiment. :param prefix: path prefix where to save the plots. :return: None """ assert_( len(history_lists) == len(labels), "Please provide the correct number of labels.", ) attributes = ["total_rewards", "average_rewards", "lengths"] figures = [] for attribute in attributes: f = plt.figure() ax = f.add_subplot() ax.set_title(attribute) for label, history_list in zip(labels, history_lists): _plot_history_attribute(history_list, attribute, label, ax=ax) figures.append(f) plt.savefig(os.path.join(prefix, attribute + ".svg")) return figures
def get_action(self, current_state: State) -> Any: """Get an action.""" assert_( isinstance(self.context.policy, MultiAgentPolicy), "Only multi-agent policy is allowed.", ) policy = cast(MultiAgentPolicy, self.context.policy) return [ p.get_action(self.process_state(current_state, i)) for i, p in enumerate(policy.policies) ]
def action_space(self, value: Optional[gym.spaces.Tuple] = None) -> None: """Set the action space.""" assert_( value is None or isinstance(value, gym.spaces.Tuple), "Only tuple spaces allowed.", ) self._action_space = value spaces = value.spaces if value is not None else [None] * len( self.policies) for p, s in zip(self.policies, spaces): p.action_space = s
def process_state(self, state: Tuple, model_id: int): """ Process a tuple of state for a specific agent. :param state: the (joint) state. :param model_id: the id of the model. :return: return the ith component of the state. """ assert_( len(state) == len(self.models), f"Expected {len(self.models)} observation for each agent, found {len(state)}.", ) return state[model_id]
def history_from_json(o: Dict) -> History: """Return a history object from JSON.""" only_allowed_keys = {"seed", "is_training", "episodes"} assert_( set(o.keys()) == only_allowed_keys, f"Only the following keys are allowed: {only_allowed_keys}.", ) is_training = o["is_training"] seed = o["seed"] episodes: List[EpisodeAgentObs] = [ agent_observations_from_json(e) for e in o["episodes"] ] return History(episodes=episodes, is_training=is_training, seed=seed)
def on_step_end(self, step, agent_observation: AgentObservation, **kwargs) -> None: """On step end event.""" state, action, reward, state_p, done = agent_observation assert_( isinstance(state, (list, tuple)), f"Expected a tuple of states, found {type(state)}: {state}", ) for i, model in enumerate(self.models): model.on_step_end( step, (self.process_state(state, i), action[i], reward, state_p[i], done), )
def model(self, value: Optional[Model] = None) -> None: """Set the model.""" assert_( value is None or isinstance(value, AbstractMultiAgentModel), "Only multi-agent models allowed.", ) value = cast(AbstractMultiAgentModel, value) assert_( value is None or len(self.policies) == len(value.models), f"Number of policies {len(self.policies)} is different from number of models {len(value.models)}.", ) self._model = value models = value.models if value is not None else [None] * len( self.policies) # type: ignore for p, m in zip(self.policies, models): p.model = m
def is_session_done(self) -> bool: """ Check whether the session is done. That is: either we run out of episodes or run out of steps. """ assert_( self.nb_episodes is not None or self.nb_steps is not None, "Please specify either 'nb_episodes' or 'nb_steps'.", ) is_beyond_max_episode = ( self.nb_episodes is not None and self.current_episode >= self.nb_episodes ) is_beyond_max_step = ( self.nb_steps is not None and self.current_step >= self.nb_steps ) return is_beyond_max_episode or is_beyond_max_step
def decode(self, s: int) -> Sequence[int]: """ Do the decoding. :param s: a point in a discrete space. :return: the decoded version of the point into a multi-discrete space. """ assert_(self.output_space.contains(s), f"{s} is not contained in the output space.") dims = self.input_space.nvec result = [] tmp = s for dim in reversed(dims[+1:]): result.append(tmp % dim) tmp = tmp // dim result.append(tmp) result = np.array(list(reversed(result))) assert_( self.input_space.contains(result), f"{result} is not contained in the input space.", ) return result
def encode(self, s: Sequence[int]) -> int: """ Do the encoding. :param s: a point in the multi-discrete space. :return: the encoded version of the point. """ if type(s) in {list, tuple}: s = np.array(s) assert_(self.input_space.contains(s), f"{s} is not contained in the input space.") dims = self.input_space.nvec result = 0 for i, dim in zip(s, dims[+1:]): result += i result *= dim result += s[-1] assert_( self.output_space.contains(result), f"{result} is not contained in the output space.", ) return result
def run_experiments( make_agent: Callable, env: gym.Env, policy: Policy, nb_runs: int = 50, nb_episodes: int = 500, nb_workers: int = 8, seeds: Optional[Sequence[int]] = None, callbacks: Sequence[LearningEventListener] = (), name_prefix: str = "experiment", ) -> Tuple[List[Agent], List[History]]: """ Run many experiments with multiprocessing. :param make_agent: a callable to make an agent. :param env: the environment to use. :param policy: the policy. :param nb_runs: the number of runs. :param nb_episodes: the number of episodes. :param nb_workers: the number of workers. :param seeds: a list of seeds; if None, the range [0, nb_runs-1] is used. :param callbacks: a list callbacks. :param name_prefix: the prefix to each experiment. :return: a list of histories, one for each run. """ agents = [] histories = [] if seeds is None: seeds = list(range(0, nb_runs)) assert_( len(seeds) == nb_runs, f"The number of seeds {len(seeds)} is different from the number of runs {nb_runs}.", ) agent = make_agent() pool = multiprocessing.Pool(processes=nb_workers) _current_seed_to_str = partial(_seed_to_str, max(seeds)) results = [ pool.apply_async( _do_job, args=( agent, env, policy, seed, nb_episodes, callbacks, name_prefix + "-" + _current_seed_to_str(seed), ), error_callback=_raise_exception, ) for seed in seeds ] try: for p in results: p.wait() except KeyboardInterrupt: pass for p in filter(lambda x: x.ready(), results): agent, history = p.get() agents.append(agent) histories.append(history) return agents, histories
def context(self) -> "Context": """Get the context.""" assert_(self._context is not None, "Context not set.") return cast(Context, self._context)
def test(self, *args, **kwargs) -> History: """Test the agent.""" assert_("is_training" not in kwargs, "Cannot specify the 'is_training' flag.") return self._play(*args, is_training=False, **kwargs) # type: ignore
def model(self) -> Model: """Get the context.""" assert_(self._model is not None, "Model not set.") return cast(Model, self._model)
def action_space(self) -> gym.spaces.Space: """Get the action space.""" assert_(self._model is not None, "Action space is not set.") return self._action_space