def play(self, callbacks: Union[List[core.AgentCallback], core.AgentCallback, None] = None, num_episodes: int = 1, max_steps_per_episode: int = 1000, play_context: core.PlayContext = None, default_plots: bool = None): """Plays num_episodes with the current policy. Args: callbacks: list of callbacks called during each episode play num_episodes: number of episodes to play max_steps_per_episode: max steps per episode play_context: play configuration to be used. If set override all other play context arguments default_plots: if set addes a set of default callbacks (plot.State, plot.Rewards, ...) Returns: play_context containg the actions taken and the rewards received during training """ if play_context is None: play_context = core.PlayContext() play_context.max_steps_per_episode = max_steps_per_episode play_context.num_episodes = num_episodes self._play(play_context=play_context, callbacks=callbacks, default_plots=default_plots) return play_context
def play(self, play_context: core.PlayContext, callbacks: List[core.AgentCallback]): """Forwarding to play_implementation overriden by the subclass. Args: play_context: play configuration to be used callbacks: list of callbacks called during play. """ assert callbacks is not None, "callbacks not set" assert play_context, "play_context not set" assert self._agent_context.play is None, "play_context already set in agent_context" play_context._reset() play_context._validate() self._agent_context.play = play_context old_callbacks = self._callbacks self._callbacks = callbacks try: monitor._MonitorEnv._register_backend_agent(self) self._on_play_begin() self.play_implementation(self._agent_context.play) self._on_play_end() finally: monitor._MonitorEnv._register_backend_agent(None) self._callbacks = old_callbacks self._agent_context.play = None
def play(self, callbacks: Union[List[core.AgentCallback], core.AgentCallback, None] = None, num_episodes: int = 1, max_steps_per_episode: int = 1000, play_context: core.PlayContext = None, default_plots: bool = None): """Plays num_episodes with the current policy. Args: callbacks: list of callbacks called during each episode play num_episodes: number of episodes to play max_steps_per_episode: max steps per episode play_context: play configuration to be used. If set override all other play context arguments default_plots: if set addes a set of default callbacks (plot.State, plot.Rewards, ...) Returns: play_context containg the actions taken and the rewards received during training """ assert self._backend_agent._agent_context._is_policy_trained, "No trained policy available. Call train() first." if play_context is None: play_context = core.PlayContext() play_context.max_steps_per_episode = max_steps_per_episode play_context.num_episodes = num_episodes callbacks = self._to_callback_list(callbacks=callbacks) callbacks = self._add_plot_callbacks(callbacks, default_plots, [plot.Steps(), plot.Rewards()]) self._backend_agent.play(play_context=play_context, callbacks=callbacks) return play_context
def test_agentcontext_play(self): b = easyagents.backends.debug.DebugAgentFactory() a = b.create_agent(PpoAgent, ModelConfig(_env_name)) c = AgentContextTest.PlayCallback() pc = PlayContext() pc.num_episodes = 10 pc.max_steps_per_episode = 10 a.play(callbacks=[Fast(), c], play_context=pc) assert not c.train_called assert c.play_called