def test_all_tasks_must_be_single_agent_or_multiagent(RPSTask, pendulum_task,
                                                      ppo_config_dict):
    agent = build_PPO_Agent(RPSTask, ppo_config_dict, 'Test-PPO')
    with pytest.raises(ValueError) as _:
        _ = benchmark_agents_on_tasks(tasks=[RPSTask, pendulum_task],
                                      agents=[agent],
                                      num_episodes=1)
def test_if_populate_all_agents_is_set_a_single_agent_must_be_provided(
        RPSTask, ppo_config_dict):
    agent = build_PPO_Agent(RPSTask, ppo_config_dict, 'Test-PPO')
    with pytest.raises(ValueError) as _:
        _ = benchmark_agents_on_tasks(tasks=[RPSTask],
                                      agents=[agent, agent],
                                      num_episodes=1,
                                      populate_all_agents=True)
def test_single_agent_can_populate_all_agents(RPSTask):
    expected_winrates = [0.5]

    actual_winrates = benchmark_agents_on_tasks(tasks=[RPSTask],
                                                agents=[rockAgent],
                                                num_episodes=200,
                                                populate_all_agents=True)
    np.testing.assert_allclose(expected_winrates, actual_winrates, atol=0.1)
Exemplo n.º 4
0
def generate_evaluation_matrix(cool_game_params, logger):
    # 0: SawBot 1: TorchBot 2: NailBot
    benchmarking_episodes = 1
    mcts_budget = 1

    saw_vs_torch_task = generate_task('CoolGame-v0',
                                      EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                                      botA_type=0,
                                      botB_type=1,
                                      **cool_game_params)
    saw_vs_nail_task = generate_task('CoolGame-v0',
                                     EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                                     botA_type=0,
                                     botB_type=2,
                                     **cool_game_params)
    torch_vs_nail_task = generate_task('CoolGame-v0',
                                       EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                                       botA_type=1,
                                       botB_type=2,
                                       **cool_game_params)

    mcts_config = {'budget': mcts_budget}
    mcts_agent = build_MCTS_Agent(saw_vs_torch_task,
                                  mcts_config,
                                  agent_name='MCTS agent')

    saw_winrates = benchmark_agents_on_tasks(
        tasks=[saw_vs_torch_task, saw_vs_nail_task],
        agents=[mcts_agent],
        populate_all_agents=True,
        num_episodes=benchmarking_episodes)
    nail_winrate = benchmark_agents_on_tasks(
        tasks=[torch_vs_nail_task],
        agents=[mcts_agent],
        populate_all_agents=True,
        num_episodes=benchmarking_episodes)

    bench_msg = f'episodes={benchmarking_episodes} MCTS_budget={mcts_budget}'
    winrates_msg = f'winrates=saw:{saw_winrates} nail:{nail_winrate}'
    logger.info(bench_msg)
    logger.info(winrates_msg)
    logger.info(f'params={cool_game_params}')
    return np.array([[0., saw_winrates[0], saw_winrates[1]],
                     [-saw_winrates[0], 0., nail_winrate[0]],
                     [-saw_winrates[0], -nail_winrate[0], 0.]])
def test_can_compute_winrate_for_player1_multiagent_task(RPSTask):
    expected_winrates = [0, 1]

    vs_paper = deepcopy(RPSTask)
    vs_scissors = deepcopy(RPSTask)

    # Ugly, would be awesome to have it in a one line
    vs_paper.extend_task(agents={1: paperAgent})
    vs_scissors.extend_task(agents={1: scissorsAgent})

    actual_winrates = benchmark_agents_on_tasks(tasks=[vs_paper, vs_scissors],
                                                agents=[rockAgent],
                                                num_episodes=10)
    np.testing.assert_array_equal(expected_winrates, actual_winrates)
def test_can_compute_cumulative_reward_for_agent_single_agent_task(
        RPSTask2Repetitions):
    vs_paper = deepcopy(RPSTask2Repetitions)
    vs_scissors = deepcopy(RPSTask2Repetitions)
    vs_paper.extend_task(agents={1: paperAgent})
    vs_scissors.extend_task(agents={1: scissorsAgent})

    expected_cumulative_rewards = [-2., 2]

    actual_winrates, actual_cumulative_rewards = benchmark_agents_on_tasks(
        tasks=[vs_paper, vs_scissors],
        agents=[rockAgent],
        keep_cumulative_rewards=True,
        num_episodes=10)
    np.testing.assert_array_equal(expected_cumulative_rewards,
                                  actual_cumulative_rewards)