class TestCartPoleEnv(unittest.TestCase): def setUp(self): self.env = CartPoleEnv() @unittest.skip('Skipping due to cartpole.out') def test_reset_return_type(self): """Check if a list is returned""" self.assertIsInstance(self.env.reset(), list) @unittest.skip('Skipping due to cartpole.out') def test_step_return_type(self): """Check if a 3-tuple of list, float, and bool is returned""" env = CartPoleEnv() env.reset() obs, reward, done = env.step(action=-1) self.assertIsInstance(obs, list) self.assertIsInstance(reward, float) self.assertIsInstance(done, bool) @unittest.skip('Skipping due to cartpole.out') def test_obs_dim_return_type(self): """Check if an integer is returned""" self.assertIsInstance(self.env.obs_dim(), int) @unittest.skip('Skipping due to cartpole.out') def test_reset_return_dims(self): """Check if a 4-dimensional list is returned""" self.assertEqual(len(self.env.reset()), 4) @unittest.skip('Skipping due to cartpole.out') def test_obs_dim_return_value(self): """Check if 4 is returned""" env = CartPoleEnv() env.reset() self.assertEqual(env.obs_dim(), 4) def test_step_wrong_input(self): """Check if assertion is raised with wrong input""" with self.assertRaises(AssertionError): self.env.step(43892.42) @unittest.skip('Skipping due to cartpole.out') def test_done_signal_per_episode(self): """Check if done signal is triggered at the end of the episode""" env = CartPoleEnv() env.reset() for t in range(10): _, _, done = env.step(action=-1) if t != 499: # Must be false within 10 steps self.assertFalse(done) # Must be true at the end of the episode self.assertTrue(done)
def main(): # Build parser parser = build_parser() options = parser.parse_args() # Set random seed random.seed(options.random_seed) # Get CEM methods cem = CrossEntropyMethod(N=options.n, p=options.p) # Create environment object env = CartPoleEnv() # Create linear model model = LinearModel(dims=env.obs_dim()) # Initialize parameters params = model.params # Episode scores win_ratio_list = [] successful_episodes = 0 for i_episode in range(options.episodes): sys.stderr.write('\n###### Episode {} of {} ###### \n'.format(i_episode+1, options.episodes)) # Sample N parameter vectors noisy_params = cem.sample_parameters(params) # Evaluate the sampled vectors rewards = [noisy_evaluation(model, env, options.step_size, i) for i in noisy_params] # Get elite parameters based on reward elite_params = cem.get_elite_parameters(noisy_params,rewards) # Update parameters params = cem.get_parameter_mean(elite_params) episode_reward = run_episode(model=update_model(model,params), env=env, steps=options.step_size, print_step=options.print_step) win_ratio = episode_reward / options.step_size sys.stderr.write('Episode reward: {} ({:.2f}%)\n'.format(episode_reward, win_ratio)) # Save win_ratio win_ratio_list.append(win_ratio) if episode_reward >= options.step_size: successful_episodes += 1 sys.stderr.write('\nFinal params: {}'.format(model.params)) sys.stderr.write('\nRun finished. {} out of {} episodes ({:.2f}%) have a reward of atleast {}\n'.format(successful_episodes, options.episodes, successful_episodes / options.episodes, options.step_size)) # If output_file is given, write scores to disk if options.output_file: sys.stderr.write('\nWriting scores to file: {}.csv...\n'.format(options.output_file)) with open(options.output_file + '.csv', 'w', newline='') as f: wr = csv.writer(f) wr.writerow(win_ratio_list) sys.stderr.write('Done!\n') # Terminate the host program env.terminate()
def test_obs_dim_return_value(self): """Check if 4 is returned""" env = CartPoleEnv() env.reset() self.assertEqual(env.obs_dim(), 4)