def _build_mock_config(base_config: PongConfig) -> MagicMock:
        config = base_config.build()
        mock_config = MagicMock()
        mock_config.gpu_memory = 2048
        mock_config.build.return_value = config

        return mock_config
    def _build_mock_config(base_config: PongConfig) -> MagicMock:
        config = base_config.build()
        config['eps'] = EpsilonGreedy(eps_initial=0.5, decay=0.0001, eps_min=0.01, decay_schedule='linear',
                                      actions_pool=list(range(3)))
        config['replay_buffer'] = ContinuousBuffer(buffer_size=10)
        config['replay_buffer_samples'] = 2
        mock_config = MagicMock()
        mock_config.gpu_memory = 2048
        mock_config.build.return_value = config

        return mock_config
Exemplo n.º 3
0
class TestPongDiffEnvironment(TestPongStackEnvironment):
    _sut = PongConfig(mode='diff', agent_type='dqn').wrapped_env
    _expected_shape = (84, 84, 1)
    _show = False

    def _plot_obs(self, obs: np.ndarray):
        plt.imshow(obs.squeeze())

        if self._show:
            plt.show()
            time.sleep(0.1)
    def test_dqn_pong_diff_example(self):
        # Arrange
        mock_config = self._build_mock_config(PongConfig(agent_type='dqn', mode='diff', plot_during_training=False,
                                                         folder=self._tmp_dir.name))

        # Act
        # Needs to run for long enough to fill replay buffer
        agent = self._sut.example(mock_config, render=False, max_episode_steps=20, n_episodes=3)

        # Assert
        self.assertFalse(agent.model_architecture.dueling)
        self.assertIsInstance(agent, self._sut)
    def test_pong_example(self):
        # Arrange
        config = PongConfig(agent_type=self._agent_type,
                            plot_during_training=False,
                            folder=self._tmp_dir.name)

        # Act
        agent = self._sut.example(config,
                                  render=False,
                                  max_episode_steps=100,
                                  n_episodes=20)

        # Assert
        self.assertIsInstance(agent, self._sut)
def run_exp(agent_type: str, n_episodes: int = 400, max_episode_steps: int = 10000,
            model_mode: str = 'diff'):
    config = PongConfig(agent_type=agent_type, mode=model_mode)

    exp = AgentExperiment(name=f"{agent_type} {model_mode} Pong",
                          agent_class=ActorCriticAgent,
                          agent_config=config,
                          n_reps=3,
                          n_jobs=3,
                          gpu_memory_per_agent=1024,
                          training_options={"n_episodes": n_episodes,
                                            "verbose": 1,
                                            "max_episode_steps": max_episode_steps})

    exp.run()
    exp.save(fn=f"{DeepQAgent.__name__}_{agent_type}experiment.pkl")
    def test_ac_pong_stack_example(self):
        # Arrange
        mock_config = self._build_mock_config(
            PongConfig(agent_type='actor_critic',
                       mode='stack',
                       plot_during_training=False,
                       folder=self._tmp_dir.name))

        # Act
        # Needs to run for long enough to fill replay buffer
        agent = self._sut.example(mock_config,
                                  render=False,
                                  max_episode_steps=20,
                                  n_episodes=3)

        # Assert
        self.assertIsInstance(agent, self._sut)
Exemplo n.º 8
0
from rlk.agents.actor_critic.actor_critic import ActorCriticAgent
from rlk.environments.atari.pong.pong_config import PongConfig

if __name__ == "__main__":
    agent = ActorCriticAgent.example(config=PongConfig(
        agent_type='actor_critic', mode='stack'),
                                     max_episode_steps=20000,
                                     n_episodes=2000,
                                     render=True,
                                     checkpoint_every=0)
    agent.save()
from rlk.agents.q_learning.deep_q_agent import DeepQAgent
from rlk.environments.atari.pong.pong_config import PongConfig

if __name__ == "__main__":
    agent = DeepQAgent.example(config=PongConfig(agent_type='dqn', mode='stack'),
                               max_episode_steps=10000,
                               render=True, update_every=1, checkpoint_every=0)
    agent.save()
Exemplo n.º 10
0
    def load(cls, fn: str) -> "DeepQAgent":
        new_agent = joblib.load(f"{fn}/agent.joblib")
        new_agent.check_ready()

        return new_agent


if __name__ == "__main__":
    from rlk.environments.atari.pong.pong_config import PongConfig
    from rlk.environments.cart_pole import CartPoleConfig
    from rlk.environments import MountainCarConfig

    # DQNs
    agent_cart_pole = DeepQAgent.example(CartPoleConfig(
        agent_type='dqn', plot_during_training=True),
                                         render=False)
    agent_mountain_car = DeepQAgent.example(
        MountainCarConfig(agent_type='dqn', plot_during_training=True))
    agent_pong = DeepQAgent.example(PongConfig(agent_type='dqn',
                                               plot_during_training=True),
                                    max_episode_steps=10000,
                                    update_every=5,
                                    render=False,
                                    checkpoint_every=10)

    # Dueling DQNs
    dueling_agent_cart_pole = DeepQAgent.example(
        CartPoleConfig(agent_type='dueling_dqn', plot_during_training=True))
    dueling_agent_mountain_car = DeepQAgent.example(
        MountainCarConfig(agent_type='dueling_dqn', plot_during_training=True))
Exemplo n.º 11
0
class TestPongStackEnvironment(unittest.TestCase):
    _sut = PongConfig(mode='stack', agent_type='dqn').wrapped_env
    _expected_shape = (84, 84, 3)
    _n_steps = 20
    _show = False

    def _plot_obs(self, obs: np.ndarray):
        n_buff = obs.shape[2]
        fig, ax = plt.subplots(ncols=n_buff)
        for i in range(n_buff):
            ax[i].imshow(obs[:, :, i])

        if self._show:
            fig.show()
            time.sleep(0.1)

    def test_reset_returns_expected_obs_shape(self):
        # Act
        obs = self._sut.reset()

        # Assert
        self.assertEqual(self._expected_shape, obs.shape)

    def test_reset_returns_expected_obs_value(self):
        # Act
        obs = self._sut.reset()

        # Assert
        self.assertLess(obs[0, 0, 0], 1)

    def test_step_returns_expected_obs_shape(self):
        # Arrange
        _ = self._sut.reset()

        # Act
        obs, reward, done, _ = self._sut.step(0)

        # Assert
        self.assertEqual(self._expected_shape, obs.shape)

    def test_step_returns_expected_obs_value(self):
        # Arrange
        _ = self._sut.reset()

        # Act
        obs, reward, done, _ = self._sut.step(0)

        # Assert
        self.assertLess(obs[0, 0, 0], 1)

    def test_multiple_steps(self):
        # Arrange
        _ = self._sut.reset()

        # Act
        for _ in range(self._n_steps):
            obs, reward, done, _ = self._sut.step(
                np.random.choice(range(self._sut.action_space.n)))

            # Manually assert
            self._plot_obs(obs)
from rlk.agents.q_learning.deep_q_agent import DeepQAgent
from rlk.environments.atari.pong.pong_config import PongConfig

if __name__ == "__main__":
    agent = DeepQAgent.example(
        config=PongConfig(agent_type='double_dueling_dqn'),
        max_episode_steps=10000,
        render=False,
        update_every=1,
        checkpoint_every=0)
    agent.save()