Пример #1
0
    def test_timesteps(self):
        agent = mock.Mock()
        agent.batch_act.side_effect = [[1, 1]] * 5

        def make_env(idx):
            env = mock.Mock()
            if idx == 0:
                # First episode: 0 -> 1 -> 2 -> 3 (reset)
                # Second episode: 4 -> 5 -> 6 -> 7 (done)
                env.reset.side_effect = [('state', 0), ('state', 4)]
                env.step.side_effect = [
                    (('state', 1), 0, False, {}),
                    (('state', 2), 0.1, False, {}),
                    (('state', 3), 0.2, False, {'needs_reset': True}),
                    (('state', 5), -0.5, False, {}),
                    (('state', 6), 0, False, {}),
                    (('state', 7), 1, True, {}),
                ]
            else:
                # First episode: 0 -> 1 (reset)
                # Second episode: 2 -> 3 (reset)
                # Third episode: 4 -> 5 -> 6 -> 7 (done)
                env.reset.side_effect = [
                    ('state', 0), ('state', 2), ('state', 4)]
                env.step.side_effect = [
                    (('state', 1), 2, False, {'needs_reset': True}),
                    (('state', 3), 3, False, {'needs_reset': True}),
                    (('state', 5), -0.6, False, {}),
                    (('state', 6), 0, False, {}),
                    (('state', 7), 1, True, {}),
                ]
            return env

        vec_env = chainerrl.envs.SerialVectorEnv(
            [make_env(i) for i in range(2)])
        if self.n_episodes:
            with self.assertRaises(AssertionError):
                scores = evaluator.batch_run_evaluation_episodes(
                    vec_env, agent,
                    n_steps=self.n_timesteps,
                    n_episodes=self.n_episodes)
        else:
            # First Env:  [1   2   (3_a)  5  6   (7_a)]
            # Second Env: [(1)(3_b) 5     6 (7_b)]
            scores = evaluator.batch_run_evaluation_episodes(
                vec_env, agent,
                n_steps=self.n_timesteps,
                n_episodes=self.n_episodes)
            if self.n_timesteps == 2:
                self.assertAlmostEqual(len(scores), 1)
                self.assertAlmostEqual(scores[0], 0.1)
                self.assertEqual(agent.batch_observe.call_count, 2)
            else:
                self.assertAlmostEqual(len(scores), 3)
                self.assertAlmostEqual(scores[0], 0.3)
                self.assertAlmostEqual(scores[1], 2.0)
                self.assertAlmostEqual(scores[2], 3.0)
            # batch_reset should be all True
            self.assertTrue(all(agent.batch_observe.call_args[0][3]))
Пример #2
0
    def _test_abc_batch(self,
                        steps=100000,
                        require_success=True,
                        gpu=-1,
                        load_model=False,
                        num_envs=4):

        if self.recurrent and gpu >= 0:
            self.skipTest(
                'NStepLSTM does not support double backprop with GPU.')
        if self.recurrent and chainer.__version__ == '7.0.0b3':
            self.skipTest(
                'chainer==7.0.0b3 has a bug in double backrop of LSTM.'
                ' See https://github.com/chainer/chainer/pull/8037')

        env, _ = self.make_vec_env_and_successful_return(test=False,
                                                         num_envs=num_envs)
        test_env, successful_return = self.make_vec_env_and_successful_return(
            test=True, num_envs=num_envs)
        agent = self.make_agent(env, gpu)
        max_episode_len = None if self.episodic else 2

        if load_model:
            print('Load agent from', self.agent_dirname)
            agent.load(self.agent_dirname)

        # Train
        train_agent_batch_with_evaluation(
            agent=agent,
            env=env,
            steps=steps,
            outdir=self.tmpdir,
            eval_interval=200,
            eval_n_steps=None,
            eval_n_episodes=40,
            successful_score=successful_return,
            eval_env=test_env,
            log_interval=100,
            max_episode_len=max_episode_len,
        )
        env.close()

        # Test
        n_test_runs = 10
        eval_returns = batch_run_evaluation_episodes(
            test_env,
            agent,
            n_steps=None,
            n_episodes=n_test_runs,
            max_episode_len=max_episode_len,
        )
        test_env.close()
        if require_success:
            n_succeeded = np.sum(np.asarray(eval_returns) >= successful_return)
            self.assertEqual(n_succeeded, n_test_runs)

        # Save
        agent.save(self.agent_dirname)
Пример #3
0
    def _test_abc_batch(self,
                        steps=100000,
                        require_success=True,
                        gpu=-1,
                        load_model=False,
                        num_envs=4):

        env, _ = self.make_vec_env_and_successful_return(test=False,
                                                         num_envs=num_envs)
        test_env, successful_return = self.make_vec_env_and_successful_return(
            test=True, num_envs=num_envs)
        agent = self.make_agent(env, gpu)
        max_episode_len = None if self.episodic else 2

        if load_model:
            print('Load agent from', self.agent_dirname)
            agent.load(self.agent_dirname)

        # Train
        train_agent_batch_with_evaluation(
            agent=agent,
            env=env,
            steps=steps,
            outdir=self.tmpdir,
            eval_interval=200,
            eval_n_steps=None,
            eval_n_episodes=40,
            successful_score=successful_return,
            eval_env=test_env,
            log_interval=100,
            max_episode_len=max_episode_len,
        )
        env.close()

        # Test
        n_test_runs = 10
        eval_returns = batch_run_evaluation_episodes(
            test_env,
            agent,
            n_steps=None,
            n_episodes=n_test_runs,
            max_episode_len=max_episode_len,
        )
        test_env.close()
        if require_success:
            n_succeeded = np.sum(np.asarray(eval_returns) >= successful_return)
            self.assertEqual(n_succeeded, n_test_runs)

        # Save
        agent.save(self.agent_dirname)
Пример #4
0
    def _test_batch_training(self,
                             gpu,
                             steps=5000,
                             load_model=False,
                             require_success=True):

        random_seed.set_random_seed(1)
        logging.basicConfig(level=logging.DEBUG)

        env, _ = self.make_vec_env_and_successful_return(test=False)
        test_env, successful_return = self.make_vec_env_and_successful_return(
            test=True)
        agent = self.make_agent(env, gpu)

        if load_model:
            print('Load agent from', self.agent_dirname)
            agent.load(self.agent_dirname)
            agent.replay_buffer.load(self.rbuf_filename)

        # Train
        train_agent_batch_with_evaluation(
            agent=agent,
            env=env,
            steps=steps,
            outdir=self.tmpdir,
            eval_interval=200,
            eval_n_steps=None,
            eval_n_episodes=5,
            successful_score=1,
            eval_env=test_env,
        )
        env.close()

        # Test
        n_test_runs = 5
        eval_returns = batch_run_evaluation_episodes(
            test_env,
            agent,
            n_steps=None,
            n_episodes=n_test_runs,
        )
        test_env.close()
        n_succeeded = np.sum(np.asarray(eval_returns) >= successful_return)
        if require_success:
            self.assertEqual(n_succeeded, n_test_runs)

        # Save
        agent.save(self.agent_dirname)
        agent.replay_buffer.save(self.rbuf_filename)
Пример #5
0
    def _test_abc(self,
                  steps=1000000,
                  require_success=True,
                  gpu=-1,
                  load_model=False):

        env, _ = self.make_env_and_successful_return(test=False,
                                                     n=self.num_processes)
        test_env, successful_return = self.make_env_and_successful_return(
            test=True, n=1)
        agent = self.make_agent(env, gpu)

        if load_model:
            print('Load agent from', self.agent_dirname)
            agent.load(self.agent_dirname)

        # Train
        chainerrl.experiments.train_agent_batch_with_evaluation(
            agent=agent,
            env=env,
            steps=steps,
            outdir=self.tmpdir,
            log_interval=10,
            eval_interval=200,
            eval_n_steps=None,
            eval_n_episodes=50,
            successful_score=1,
            eval_env=test_env,
        )
        env.close()

        # Test
        n_test_runs = 100
        eval_returns = batch_run_evaluation_episodes(
            test_env,
            agent,
            n_steps=None,
            n_episodes=n_test_runs,
        )
        test_env.close()
        n_succeeded = np.sum(np.asarray(eval_returns) >= successful_return)
        if require_success:
            self.assertGreater(n_succeeded, 0.8 * n_test_runs)

        # Save
        agent.save(self.agent_dirname)
Пример #6
0
    def test_needs_reset(self):
        agent = mock.Mock()
        agent.batch_act.side_effect = [[1, 1]] * 5

        def make_env(idx):
            env = mock.Mock()
            if idx == 0:
                # First episode: 0 -> 1 -> 2 -> 3 (reset)
                # Second episode: 4 -> 5 -> 6 -> 7 (done)
                env.reset.side_effect = [('state', 0), ('state', 4)]
                env.step.side_effect = [
                    (('state', 1), 0, False, {}),
                    (('state', 2), 0, False, {}),
                    (('state', 3), 0, False, {'needs_reset': True}),
                    (('state', 5), -0.5, False, {}),
                    (('state', 6), 0, False, {}),
                    (('state', 7), 1, True, {}),
                ]
            else:
                # First episode: 0 -> 1 (reset)
                # Second episode: 2 -> 3 (reset)
                # Third episode: 4 -> 5 -> 6 -> 7 (done)
                env.reset.side_effect = [
                    ('state', 0), ('state', 2), ('state', 4)]
                env.step.side_effect = [
                    (('state', 1), 2, False, {'needs_reset': True}),
                    (('state', 3), 3, False, {'needs_reset': True}),
                    (('state', 5), -0.6, False, {}),
                    (('state', 6), 0, False, {}),
                    (('state', 7), 1, True, {}),
                ]
            return env

        vec_env = chainerrl.envs.SerialVectorEnv(
            [make_env(i) for i in range(2)])

        # First Env: [1 2 (3_a) 5 6 (7_a)]
        # Second Env: [(1) (3_b) 5 6 (7_b)]
        # Results: (1), (3a), (3b), (7b)
        scores = evaluator.batch_run_evaluation_episodes(
            vec_env, agent, n_steps=None, n_episodes=4)
        self.assertAlmostEqual(len(scores), 4)
        self.assertAlmostEqual(scores[0], 0)
        self.assertAlmostEqual(scores[1], 2)
        self.assertAlmostEqual(scores[2], 3)
        self.assertAlmostEqual(scores[3], 0.4)