Пример #1
0
    def play(self,
             callbacks: Union[List[core.AgentCallback], core.AgentCallback,
                              None] = None,
             num_episodes: int = 1,
             max_steps_per_episode: int = 1000,
             play_context: core.PlayContext = None,
             default_plots: bool = None):
        """Plays num_episodes with the current policy.

        Args:
            callbacks: list of callbacks called during each episode play
            num_episodes: number of episodes to play
            max_steps_per_episode: max steps per episode
            play_context: play configuration to be used. If set override all other play context arguments
            default_plots: if set addes a set of default callbacks (plot.State, plot.Rewards, ...)

        Returns:
            play_context containg the actions taken and the rewards received during training
        """
        if play_context is None:
            play_context = core.PlayContext()
            play_context.max_steps_per_episode = max_steps_per_episode
            play_context.num_episodes = num_episodes
        self._play(play_context=play_context,
                   callbacks=callbacks,
                   default_plots=default_plots)
        return play_context
Пример #2
0
    def play(self, play_context: core.PlayContext,
             callbacks: List[core.AgentCallback]):
        """Forwarding to play_implementation overriden by the subclass.

            Args:
                play_context: play configuration to be used
                callbacks: list of callbacks called during play.
        """
        assert callbacks is not None, "callbacks not set"
        assert play_context, "play_context not set"
        assert self._agent_context.play is None, "play_context already set in agent_context"

        play_context._reset()
        play_context._validate()
        self._agent_context.play = play_context
        old_callbacks = self._callbacks
        self._callbacks = callbacks
        try:
            monitor._MonitorEnv._register_backend_agent(self)
            self._on_play_begin()
            self.play_implementation(self._agent_context.play)
            self._on_play_end()
        finally:
            monitor._MonitorEnv._register_backend_agent(None)
            self._callbacks = old_callbacks
            self._agent_context.play = None
Пример #3
0
    def play(self,
             callbacks: Union[List[core.AgentCallback], core.AgentCallback, None] = None,
             num_episodes: int = 1,
             max_steps_per_episode: int = 1000,
             play_context: core.PlayContext = None,
             default_plots: bool = None):
        """Plays num_episodes with the current policy.

        Args:
            callbacks: list of callbacks called during each episode play
            num_episodes: number of episodes to play
            max_steps_per_episode: max steps per episode
            play_context: play configuration to be used. If set override all other play context arguments
            default_plots: if set addes a set of default callbacks (plot.State, plot.Rewards, ...)

        Returns:
            play_context containg the actions taken and the rewards received during training
        """
        assert self._backend_agent._agent_context._is_policy_trained, "No trained policy available. Call train() first."
        if play_context is None:
            play_context = core.PlayContext()
            play_context.max_steps_per_episode = max_steps_per_episode
            play_context.num_episodes = num_episodes
        callbacks = self._to_callback_list(callbacks=callbacks)
        callbacks = self._add_plot_callbacks(callbacks, default_plots, [plot.Steps(), plot.Rewards()])
        self._backend_agent.play(play_context=play_context, callbacks=callbacks)
        return play_context
Пример #4
0
 def test_agentcontext_play(self):
     b = easyagents.backends.debug.DebugAgentFactory()
     a = b.create_agent(PpoAgent, ModelConfig(_env_name))
     c = AgentContextTest.PlayCallback()
     pc = PlayContext()
     pc.num_episodes = 10
     pc.max_steps_per_episode = 10
     a.play(callbacks=[Fast(), c], play_context=pc)
     assert not c.train_called
     assert c.play_called