Пример #1
0
class TestCartPoleEnv(unittest.TestCase):
    def setUp(self):
        self.env = CartPoleEnv()

    @unittest.skip('Skipping due to cartpole.out')
    def test_reset_return_type(self):
        """Check if a list is returned"""
        self.assertIsInstance(self.env.reset(), list)

    @unittest.skip('Skipping due to cartpole.out')
    def test_step_return_type(self):
        """Check if a 3-tuple of list, float, and bool is returned"""
        env = CartPoleEnv()
        env.reset()
        obs, reward, done = env.step(action=-1)

        self.assertIsInstance(obs, list)
        self.assertIsInstance(reward, float)
        self.assertIsInstance(done, bool)

    @unittest.skip('Skipping due to cartpole.out')
    def test_obs_dim_return_type(self):
        """Check if an integer is returned"""
        self.assertIsInstance(self.env.obs_dim(), int)

    @unittest.skip('Skipping due to cartpole.out')
    def test_reset_return_dims(self):
        """Check if a 4-dimensional list is returned"""
        self.assertEqual(len(self.env.reset()), 4)

    @unittest.skip('Skipping due to cartpole.out')
    def test_obs_dim_return_value(self):
        """Check if 4 is returned"""
        env = CartPoleEnv()
        env.reset()
        self.assertEqual(env.obs_dim(), 4)

    def test_step_wrong_input(self):
        """Check if assertion is raised with wrong input"""
        with self.assertRaises(AssertionError):
            self.env.step(43892.42)

    @unittest.skip('Skipping due to cartpole.out')
    def test_done_signal_per_episode(self):
        """Check if done signal is triggered at the end of the episode"""
        env = CartPoleEnv()
        env.reset()

        for t in range(10):
            _, _, done = env.step(action=-1)
            if t != 499:
                # Must be false within 10 steps
                self.assertFalse(done)

        # Must be true at the end of the episode
        self.assertTrue(done)
Пример #2
0
def main():
    # Build parser
    parser = build_parser()
    options = parser.parse_args()

    # Set random seed
    random.seed(options.random_seed)

    # Get CEM methods
    cem = CrossEntropyMethod(N=options.n, p=options.p)
    # Create environment object
    env = CartPoleEnv()
    # Create linear model
    model = LinearModel(dims=env.obs_dim())

    # Initialize parameters
    params = model.params

    # Episode scores
    win_ratio_list = []

    successful_episodes = 0
    for i_episode in range(options.episodes):
        sys.stderr.write('\n###### Episode {} of {} ###### \n'.format(i_episode+1, options.episodes))

        # Sample N parameter vectors
        noisy_params = cem.sample_parameters(params)
        # Evaluate the sampled vectors
        rewards = [noisy_evaluation(model, env, options.step_size, i) for i in noisy_params]
        # Get elite parameters based on reward
        elite_params = cem.get_elite_parameters(noisy_params,rewards)
        # Update parameters
        params = cem.get_parameter_mean(elite_params)
        episode_reward = run_episode(model=update_model(model,params), env=env, steps=options.step_size, print_step=options.print_step)
        win_ratio = episode_reward / options.step_size
        sys.stderr.write('Episode reward: {} ({:.2f}%)\n'.format(episode_reward, win_ratio))
        # Save win_ratio
        win_ratio_list.append(win_ratio)

        if episode_reward >= options.step_size:
            successful_episodes += 1

    sys.stderr.write('\nFinal params: {}'.format(model.params))
    sys.stderr.write('\nRun finished. {} out of {} episodes ({:.2f}%) have a reward of atleast {}\n'.format(successful_episodes, options.episodes, successful_episodes / options.episodes, options.step_size))

    # If output_file is given, write scores to disk
    if options.output_file:
        sys.stderr.write('\nWriting scores to file: {}.csv...\n'.format(options.output_file))
        with open(options.output_file + '.csv', 'w', newline='') as f:
            wr = csv.writer(f)
            wr.writerow(win_ratio_list)
        sys.stderr.write('Done!\n')

    # Terminate the host program
    env.terminate()
Пример #3
0
 def test_obs_dim_return_value(self):
     """Check if 4 is returned"""
     env = CartPoleEnv()
     env.reset()
     self.assertEqual(env.obs_dim(), 4)