def main():
    task = generate_task('CoolGame-v0',
                         EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                         botA_type=1,
                         botB_type=2)

    random_r1 = build_Random_Agent(task, {}, agent_name='random')
    random_r2 = deepcopy(random_r1)

    mcts_config = {
        'budget': 10,
        'rollout_budget': 1000,
        'selection_phase': 'ucb1',
        'exploration_factor_ucb1': 4  # Might need to tweak this?
    }

    mcts_r1 = build_MCTS_Agent(task, mcts_config, agent_name='P1: MCTS')
    mcts_r2 = build_MCTS_Agent(task, mcts_config, agent_name='P2: MCTS')

    human_r1 = HumanAgent(task.action_dim, name='P1')
    human_r2 = HumanAgent(task.action_dim, name='P2')

    # t = task.run_episode([mcts_r1, mcts_r2], training=False, render_mode='rgb', save_gif=True)
    t = task.run_episode([mcts_r1, mcts_r2], training=False)
    print(t)
Exemplo n.º 2
0
def test_train_apprentice_using_dagger_against_random_connect4(Connect4Task, expert_iteration_config_dict, mcts_config_dict):
    # Train worthy params
    expert_iteration_config_dict['use_apprentice_in_expert'] = False
    expert_iteration_config_dict['games_per_iteration'] = 10

    expert_iteration_config_dict['mcts_budget'] = 500
    expert_iteration_config_dict['mcts_rollout_budget'] = 100
    expert_iteration_config_dict['initial_memory_size'] = 10000
    expert_iteration_config_dict['memory_size_increase_frequency'] = 5
    expert_iteration_config_dict['end_memory_size'] = 30000
    expert_iteration_config_dict['use_dirichlet'] = False

    expert_iteration_config_dict['learning_rate'] = 1.0e-2
    expert_iteration_config_dict['batch_size'] = 256
    expert_iteration_config_dict['num_epochs_per_iteration'] = 4
    expert_iteration_config_dict['residual_connections'] = [(1, 2), (2, 3), (3, 4)]

    ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt-test')
    ex_it.algorithm.summary_writer = SummaryWriter('expert_iteration_test')

    random_agent = build_Random_Agent(Connect4Task, mcts_config_dict, agent_name=f"Random")

    parallel_learn_against_fix_opponent(ex_it,
            fixed_opponent=random_agent,
            agent_position=0,
            task=Connect4Task,
            training_episodes=5000,
            test_episodes=100,
            benchmarking_episodes=20,
            benchmark_every_n_episodes=500,
            reward_tolerance=0.2,
            maximum_average_reward=1.0,
            evaluation_method='last',
            show_progress=True,
            summary_writer=summary_writer)
Exemplo n.º 3
0
def test_can_collect_one_hot_encoded_opponent_action_multi_env(Connect4Task, expert_iteration_config_dict):
    expert_iteration_config_dict['use_agent_modelling'] = True
    expert_iteration_config_dict['request_observed_action'] = True
    ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt-opponent_modelling-test')
    assert ex_it.requires_opponents_prediction

    random_agent = build_Random_Agent(Connect4Task, {}, agent_name='Random')

    _ = Connect4Task.run_episodes(
        agent_vector=[ex_it, random_agent],
        training=True,  # Required for ExIt agent to `handle_experience`s
        num_envs=2, num_episodes=2)
    # We only check for existance of the key, rather than it's content
    assert 'opponent_policy' in ex_it.algorithm.memory.keys
    assert 'opponent_s' in ex_it.algorithm.memory.keys
    # ex_it.algorithm.memory. Once you fix it. push!
    assert len(ex_it.algorithm.memory.opponent_policy) == len(ex_it.algorithm.memory.s)
    assert len(ex_it.algorithm.memory.opponent_policy) == len(ex_it.algorithm.memory.opponent_s)

    for opponent_action in ex_it.algorithm.memory.opponent_policy:
        # There is a single 1, all other elements are 0
        if torch.any(torch.isnan(opponent_action)): continue
        else:
            values, counts = opponent_action.unique(return_counts=True)
            assert torch.equal(torch.Tensor([0, 1]), values.float())
            assert torch.equal(torch.Tensor([Connect4Task.action_dim - 1, 1]), counts.float())
Exemplo n.º 4
0
def test_random_agent_can_act_on_single_agent_env(CartPoleTask):
    action_space = CartPoleTask.env.action_space

    agent = build_Random_Agent(CartPoleTask, {}, 'RandomTest')
    trajectory = CartPoleTask.run_episode([agent], training=False)
    assert all(
        map(lambda a: action_space.contains(a),
            extract_actions_from_trajectory(trajectory)))
def test_integration_random_agent_rock_paper_scissors(RPSTask):
    population = [
        build_Random_Agent(RPSTask, {}, 'Test-1'),
        build_Random_Agent(RPSTask, {}, 'Test-2')
    ]
    winrate_matrix_metagame = compute_winrate_matrix_metagame(
        population=population,
        episodes_per_matchup=5,
        task=RPSTask,
        num_envs=1)

    # Diagonal winrates are all 0.5
    np.testing.assert_allclose(
        winrate_matrix_metagame.diagonal(),
        np.full(winrate_matrix_metagame.diagonal().shape, 0.5))
    # a_i,j + a_j,i = 1 for all non diagonal entries
    for i, j in zip(*np.triu_indices_from(winrate_matrix_metagame, k=1)):
        complementary_sum = winrate_matrix_metagame[
            i, j] + winrate_matrix_metagame[j, i]
        np.testing.assert_allclose(complementary_sum, 1.)
Exemplo n.º 6
0
def test_can_defeat_random_play_in_connect4_both_positions_single_env(Connect4Task, expert_iteration_config_dict):
    expert_iteration_config_dict['mcts_budget'] = 100
    expert_iteration_config_dict['mcts_rollout_budget'] = 20
    ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='MCTS1-test')

    random_agent = build_Random_Agent(Connect4Task, {}, agent_name='Random')

    trajectory = Connect4Task.run_episode([ex_it, random_agent], training=False)
    assert trajectory.winner == 0  # First player (index 0) has a much higher budget

    trajectory = Connect4Task.run_episode([random_agent, ex_it], training=False)
    assert trajectory.winner == 1  # Second player (index 1) has a much higher budget
Exemplo n.º 7
0
def test_can_defeat_random_play_in_connect4_both_positions_multi_env(Connect4Task, expert_iteration_config_dict):
    expert_iteration_config_dict['mcts_budget'] = 100
    expert_iteration_config_dict['mcts_rollout_budget'] = 20
    ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='MCTS1-test')

    random_agent = build_Random_Agent(Connect4Task, {}, agent_name='Random')

    trajectories = Connect4Task.run_episodes(
            [ex_it, random_agent], training=False, num_envs=4, num_episodes=4)

    assert all(map(lambda t: t.winner == 0, trajectories))  # First player (index 0) has a much higher budget

    trajectories = Connect4Task.run_episodes(
            [random_agent, ex_it], training=False, num_envs=4, num_episodes=4)
    assert all(map(lambda t: t.winner == 1, trajectories))  # Second player (index 1) has a much higher budget
Exemplo n.º 8
0
def test_can_use_data_augmentation_to_double_experiences(Connect4Task, expert_iteration_config_dict):
    expert_iteration_config_dict['state_preprocessing_fn'] = 'turn_into_single_element_batch'
    expert_iteration_config_dict['data_augmnentation_fn'] = {
        'name': 'generate_horizontal_symmetry', 'flip_obs_on_dim': 1
    }
    ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt1-test')
    random_agent = build_Random_Agent(Connect4Task, {}, agent_name='Random')

    trajectories = Connect4Task.run_episodes(agent_vector=[ex_it, random_agent],
                              num_envs=2, num_episodes=1, training=True)
    import ipdb; ipdb.set_trace()
    # Add data augmentation as part of expert_iteration_config_dict
    # Ran episode against random opponent
    # Check that number of datapoints in storage is twice the number of datapoints elsewhere?
    # Check that there is a single "done" flag in the storage (i.e finished episodes is only 1 in agent)
    pass
Exemplo n.º 9
0
def test_can_collect_opponent_action_distributions_multi_env(Connect4Task, expert_iteration_config_dict):
    expert_iteration_config_dict['use_agent_modelling'] = True
    ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt-opponent_modelling-test')
    assert ex_it.requires_opponents_prediction

    random_agent = build_Random_Agent(Connect4Task, {}, agent_name='Random')

    _ = Connect4Task.run_episodes(
        agent_vector=[ex_it, random_agent],
        training=True,  # Required for ExIt agent to `handle_experience`s
        num_envs=2, num_episodes=2)
    # We only check for existance of the key, rather than it's content
    assert 'opponent_policy' in ex_it.algorithm.memory.keys
    assert 'opponent_s' in ex_it.algorithm.memory.keys
    # ex_it.algorithm.memory. Once you fix it. push!
    assert len(ex_it.algorithm.memory.opponent_policy) == len(ex_it.algorithm.memory.s)
    assert len(ex_it.algorithm.memory.opponent_policy) == len(ex_it.algorithm.memory.opponent_s)
Exemplo n.º 10
0
def test_singleagent_tasks_run_faster_on_parallel(env_name):
    task = generate_task(env_name, EnvType.SINGLE_AGENT)
    random_agent = build_Random_Agent(task, {}, 'Test-Random')

    num_episodes = 50
    num_envs = 1
    start = time.time()
    trajectories = task.run_episodes([random_agent],
                                     num_episodes=num_episodes,
                                     num_envs=num_envs,
                                     training=False)
    total_single = time.time() - start

    start = time.time()
    num_envs = multiprocessing.cpu_count()
    trajectories = task.run_episodes([random_agent],
                                     num_episodes=num_episodes,
                                     num_envs=num_envs,
                                     training=False)
    total_multiple = time.time() - start

    assert total_multiple < total_single
Exemplo n.º 11
0
def test_multiagent_sequential_tasks_run_faster_on_parallel(env_name):
    task = generate_task(env_name, EnvType.MULTIAGENT_SEQUENTIAL_ACTION)
    random_agent = build_Random_Agent(task, {}, 'Test-Random')

    start = time.time()
    num_episodes = 100
    num_envs = 1
    _ = task.run_episodes([random_agent, random_agent],
                          num_episodes=num_episodes,
                          num_envs=num_envs,
                          training=False)
    total_single = time.time() - start

    start = time.time()
    num_envs = multiprocessing.cpu_count()
    _ = task.run_episodes([random_agent, random_agent],
                          num_episodes=num_episodes,
                          num_envs=num_envs,
                          training=False)
    total_multiple = time.time() - start
    print('Parallel: ', total_multiple, 'Sequential: ', total_single, 'Diff: ',
          total_single - total_multiple)
    assert total_multiple < total_single
Exemplo n.º 12
0
def run_parallel_task_with_random_agent(env_name,
                                        env_type,
                                        num_envs,
                                        num_episodes,
                                        model_based_agents=False):
    task = generate_task(env_name, env_type)
    # Random agens, either MCTS or random
    if model_based_agents:
        mcts_config = {
            'budget': 1,
            'rollout_budget': 0,
            'use_dirichlet': False,
            'dirichlet_alpha': 1,
            'selection_phase': 'ucb1',
            'exploration_factor_ucb1': 1,
            'expose_tree_in_predictions': True
        }
        agent_vector = [
            build_MCTS_Agent(task, mcts_config, 'Test-MCTS-Random')
            for _ in range(task.num_agents)
        ]
    else:
        agent_vector = [
            build_Random_Agent(task, {}, 'Test-Random')
            for _ in range(task.num_agents)
        ]

    # The number of environments is larger than number of
    # episodes because we want to test if we can generate
    # a specific number of trajectories regardless of the
    # Number of environments used to generate them
    trajectories = task.run_episodes(agent_vector,
                                     num_episodes=num_episodes,
                                     num_envs=num_envs,
                                     training=True,
                                     store_extra_information=True)
    import pdbr
    pdbr.set_trace()

    # We have the exact number of trajectories we asked for
    # The number of trajectories is lower-bounded by :param: num_episodes
    # But it is possible that multiple environments finish at the same time
    assert (len(trajectories) >= num_episodes) and (len(trajectories) <=
                                                    (num_episodes + num_envs))

    # All trajectories finish with a "done" flag
    assert all([t[-1].done for t in trajectories])

    # All timesteps except for last one in all trajectories don't have "done" set
    for t in trajectories:
        assert all([not timestep.done for timestep in t[:-1]])

    # ASSUMPTION: observation and succ_observation are numpy array
    if env_type == EnvType.SINGLE_AGENT:
        # Observation and succ_observation are the same
        # ASSUMPTION: observation and succ_observation are numpy array
        assert all([(ex_1.succ_observation == ex_2.observation).all()
                    for t in trajectories for ex_1, ex_2 in zip(t, t[1:])])
    else:
        # Observation and succ_observation are the same for all agents
        assert all([
            (ex_1.succ_observation[a_i] == ex_2.observation[a_i]).all()
            for t in trajectories for ex_1, ex_2 in zip(t, t[1:])
            for a_i in range(task.num_agents)
        ])