def test_train(self): for backend in get_backends(RandomAgent): random_agent = RandomAgent('CartPole-v0', backend=backend) tc: core.TrainContext = random_agent.train([log.Duration(), log.Iteration()], num_iterations=10, max_steps_per_episode=100, default_plots=False) (min_steps, avg_steps, max_steps) = tc.eval_steps[tc.episodes_done_in_training] assert avg_steps >= 10
def test_score(self): random_agent = RandomAgent('CartPole-v0') num_episodes = 5 mean, std, min_reward, max_reward, all_rewards = random_agent.score(num_episodes=num_episodes) assert min_reward <= max_reward assert mean <= max_reward assert mean >= min_reward assert std >= 0 assert len(all_rewards) == num_episodes
def test_train(self): for backend in get_backends(RandomAgent): random_agent = RandomAgent(_line_world_name, backend=backend) tc: core.TrainContext = random_agent.train([log.Duration(), log.Iteration()], num_iterations=10, max_steps_per_episode=100, default_plots=False) r = max_avg_rewards(tc) assert r >= 0
def test_evaluate(self): random_agent = RandomAgent('CartPole-v0') num_episodes = 5 metrics = random_agent.evaluate(num_episodes=num_episodes) assert_properties_for_metric(metrics.steps, num_episodes) assert_properties_for_metric(metrics.rewards, num_episodes)
def test_seed(self): oldseed = agents.seed agents.seed = 123 random_agent = RandomAgent(_line_world_name) assert random_agent._model_config.seed == 123 agents.seed = oldseed
def test_evaluate(self): random_agent = RandomAgent(_line_world_name) num_episodes = 5 metrics = random_agent.evaluate(num_episodes=num_episodes) self.assert_properties_for_metric(metrics.steps, num_episodes) self.assert_properties_for_metric(metrics.rewards, num_episodes)
def test_agent_saver_set(self): random_agent = RandomAgent(_line_world_name) assert random_agent._backend_agent._agent_context._agent_saver