Exemplo n.º 1
0
def test_mcts_can_take_actions_discrete_obvservation_discrete_action(
        Connect4Task, mcts_config_dict):
    mcts1 = build_MCTS_Agent(Connect4Task,
                             mcts_config_dict,
                             agent_name='MCTS1-test')
    mcts2 = build_MCTS_Agent(Connect4Task,
                             mcts_config_dict,
                             agent_name='MCTS2-test')
    Connect4Task.run_episode([mcts1, mcts2], training=False)
Exemplo n.º 2
0
def test_can_defeat_random_play_in_connect4_both_positions_single_env(Connect4Task, expert_iteration_config_dict):
    expert_iteration_config_dict['mcts_budget'] = 100
    expert_iteration_config_dict['mcts_rollout_budget'] = 20
    ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='MCTS1-test')

    random_agent = build_Random_Agent(Connect4Task, {}, agent_name='Random')

    trajectory = Connect4Task.run_episode([ex_it, random_agent], training=False)
    assert trajectory.winner == 0  # First player (index 0) has a much higher budget

    trajectory = Connect4Task.run_episode([random_agent, ex_it], training=False)
    assert trajectory.winner == 1  # Second player (index 1) has a much higher budget
Exemplo n.º 3
0
def test_can_defeat_random_play_in_connect4_both_positions_multi_env(Connect4Task, expert_iteration_config_dict):
    expert_iteration_config_dict['mcts_budget'] = 100
    expert_iteration_config_dict['mcts_rollout_budget'] = 20
    ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='MCTS1-test')

    random_agent = build_Random_Agent(Connect4Task, {}, agent_name='Random')

    trajectories = Connect4Task.run_episodes(
            [ex_it, random_agent], training=False, num_envs=4, num_episodes=4)

    assert all(map(lambda t: t.winner == 0, trajectories))  # First player (index 0) has a much higher budget

    trajectories = Connect4Task.run_episodes(
            [random_agent, ex_it], training=False, num_envs=4, num_episodes=4)
    assert all(map(lambda t: t.winner == 1, trajectories))  # Second player (index 1) has a much higher budget
Exemplo n.º 4
0
def test_can_collect_one_hot_encoded_opponent_action_multi_env(Connect4Task, expert_iteration_config_dict):
    expert_iteration_config_dict['use_agent_modelling'] = True
    expert_iteration_config_dict['request_observed_action'] = True
    ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt-opponent_modelling-test')
    assert ex_it.requires_opponents_prediction

    random_agent = build_Random_Agent(Connect4Task, {}, agent_name='Random')

    _ = Connect4Task.run_episodes(
        agent_vector=[ex_it, random_agent],
        training=True,  # Required for ExIt agent to `handle_experience`s
        num_envs=2, num_episodes=2)
    # We only check for existance of the key, rather than it's content
    assert 'opponent_policy' in ex_it.algorithm.memory.keys
    assert 'opponent_s' in ex_it.algorithm.memory.keys
    # ex_it.algorithm.memory. Once you fix it. push!
    assert len(ex_it.algorithm.memory.opponent_policy) == len(ex_it.algorithm.memory.s)
    assert len(ex_it.algorithm.memory.opponent_policy) == len(ex_it.algorithm.memory.opponent_s)

    for opponent_action in ex_it.algorithm.memory.opponent_policy:
        # There is a single 1, all other elements are 0
        if torch.any(torch.isnan(opponent_action)): continue
        else:
            values, counts = opponent_action.unique(return_counts=True)
            assert torch.equal(torch.Tensor([0, 1]), values.float())
            assert torch.equal(torch.Tensor([Connect4Task.action_dim - 1, 1]), counts.float())
Exemplo n.º 5
0
def test_can_defeat_random_play_in_connect4_both_positions(
        Connect4Task, mcts_config_dict):
    mcts1 = build_MCTS_Agent(Connect4Task,
                             mcts_config_dict,
                             agent_name='MCTS1-test')
    mcts_config_dict['budget'] = 50
    mcts2 = build_MCTS_Agent(Connect4Task,
                             mcts_config_dict,
                             agent_name='MCTS2-test')
    trajectory = Connect4Task.run_episode([mcts1, mcts2], training=False)

    assert extract_winner(
        trajectory) == 1  # Second player (index 1) has a much higher budget
    trajectory = Connect4Task.run_episode([mcts2, mcts1], training=False)
    assert extract_winner(
        trajectory) == 0  # First player (index 0) has a much higher budget
Exemplo n.º 6
0
def test_deterministic_agent_can_act_on_multiagent_sequential_environment(
        Connect4Task):
    expected_actions = [0, 1]
    agent_1 = build_Deterministic_Agent(Connect4Task,
                                        {'action': expected_actions[0]},
                                        'DeterministicTest-1')
    agent_2 = build_Deterministic_Agent(Connect4Task,
                                        {'action': expected_actions[1]},
                                        'DeterministicTest-2')

    trajectory = Connect4Task.run_episode([agent_1, agent_2], training=False)

    for i, (s, a, r, succ_s, o) in enumerate(trajectory):
        assert a == expected_actions[i % 2]
Exemplo n.º 7
0
def test_can_use_data_augmentation_to_double_experiences(Connect4Task, expert_iteration_config_dict):
    expert_iteration_config_dict['state_preprocessing_fn'] = 'turn_into_single_element_batch'
    expert_iteration_config_dict['data_augmnentation_fn'] = {
        'name': 'generate_horizontal_symmetry', 'flip_obs_on_dim': 1
    }
    ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt1-test')
    random_agent = build_Random_Agent(Connect4Task, {}, agent_name='Random')

    trajectories = Connect4Task.run_episodes(agent_vector=[ex_it, random_agent],
                              num_envs=2, num_episodes=1, training=True)
    import ipdb; ipdb.set_trace()
    # Add data augmentation as part of expert_iteration_config_dict
    # Ran episode against random opponent
    # Check that number of datapoints in storage is twice the number of datapoints elsewhere?
    # Check that there is a single "done" flag in the storage (i.e finished episodes is only 1 in agent)
    pass
def test_sequential_trajectories_feature_agent_predictions_single_env(
        Connect4Task):
    agent_1 = build_Deterministic_Agent(Connect4Task, {'action': 0},
                                        'Col-0-DeterministicAgent')
    agent_1.requires_opponents_prediction = True  # Required!
    agent_2 = build_Deterministic_Agent(Connect4Task, {'action': 1},
                                        'Col-0-DeterministicAgent')

    trajectory = Connect4Task.run_episode([agent_1, agent_2], training=False)

    expected_prediction_1 = {'a': 0, 'probs': [[1., 0., 0., 0., 0., 0., 0.]]}
    expected_prediction_2 = {'a': 1, 'probs': [[0., 1., 0., 0., 0., 0., 0.]]}
    expected_predictions = [expected_prediction_1, expected_prediction_2]

    compare_trajectory_extra_info_against_expected(trajectory,
                                                   expected_predictions)
Exemplo n.º 9
0
def test_deterministic_agent_can_act_on_async_single_agent(Connect4Task):
    expected_actions = [0, 1]
    agent_1 = build_Deterministic_Agent(Connect4Task,
                                        {'action': expected_actions[0]},
                                        'DeterministicTest-1')
    agent_2 = build_Deterministic_Agent(Connect4Task,
                                        {'action': expected_actions[1]},
                                        'DeterministicTest-2')

    trajectories = Connect4Task.run_episodes([agent_1, agent_2],
                                             training=False,
                                             num_envs=2,
                                             num_episodes=2)

    for trajectory in trajectories:
        for i, (s, a, r, succ_s, o) in enumerate(trajectory):
            assert a == expected_actions[i % 2]
Exemplo n.º 10
0
def test_can_collect_opponent_action_distributions_multi_env(Connect4Task, expert_iteration_config_dict):
    expert_iteration_config_dict['use_agent_modelling'] = True
    ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt-opponent_modelling-test')
    assert ex_it.requires_opponents_prediction

    random_agent = build_Random_Agent(Connect4Task, {}, agent_name='Random')

    _ = Connect4Task.run_episodes(
        agent_vector=[ex_it, random_agent],
        training=True,  # Required for ExIt agent to `handle_experience`s
        num_envs=2, num_episodes=2)
    # We only check for existance of the key, rather than it's content
    assert 'opponent_policy' in ex_it.algorithm.memory.keys
    assert 'opponent_s' in ex_it.algorithm.memory.keys
    # ex_it.algorithm.memory. Once you fix it. push!
    assert len(ex_it.algorithm.memory.opponent_policy) == len(ex_it.algorithm.memory.s)
    assert len(ex_it.algorithm.memory.opponent_policy) == len(ex_it.algorithm.memory.opponent_s)
def test_sequential_trajectories_feature_agent_predictions_multienv(
        Connect4Task):
    agent_1 = build_Deterministic_Agent(Connect4Task, {'action': 0},
                                        'Col-0-DeterministicAgent')
    agent_1.requires_opponents_prediction = True  # Required!
    agent_2 = build_Deterministic_Agent(Connect4Task, {'action': 1},
                                        'Col-0-DeterministicAgent')

    trajectories = Connect4Task.run_episodes([agent_1, agent_2],
                                             training=False,
                                             num_envs=2,
                                             num_episodes=2)

    # on single agents there's a batch dimension in 'probs', but not
    # on multiagent_loops. Does this matter?
    expected_prediction_1 = {'a': 0, 'probs': [1., 0., 0., 0., 0., 0., 0.]}
    expected_prediction_2 = {'a': 1, 'probs': [0., 1., 0., 0., 0., 0., 0.]}
    expected_predictions = [expected_prediction_1, expected_prediction_2]

    for trajectory in trajectories:
        compare_trajectory_extra_info_against_expected(trajectory,
                                                       expected_predictions)
Exemplo n.º 12
0
def test_can_use_apprentice_in_expert_in_expansion_and_rollout_phase(Connect4Task, expert_iteration_config_dict):
    expert_iteration_config_dict['use_apprentice_in_expert'] = True
    expert_iteration_config_dict['rollout_budget'] = 0
    exIt_agent_1 = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt1-test')
    exIt_agent_2 = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt2-test')
    Connect4Task.run_episode([exIt_agent_1, exIt_agent_2], training=False)
Exemplo n.º 13
0
def test_expert_iteration_can_take_actions_discrete_obvservation_discrete_action(Connect4Task, expert_iteration_config_dict):
    exIt_agent_1 = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt1-test')
    exIt_agent_2 = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt2-test')
    Connect4Task.run_episode([exIt_agent_1, exIt_agent_2], training=False)