示例#1
0
def test_ppo_model_cc_visual(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=False, visual_inputs=2)
            env = UnityEnvironment(' ')

            model = PPOModel(env.brains["RealFakeBrain"])
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [
                model.output, model.log_probs, model.value, model.entropy,
                model.learning_rate
            ]
            feed_dict = {
                model.batch_size: 2,
                model.sequence_length: 1,
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                           [3, 4, 5, 3, 4, 5]]),
                model.visual_in[0]: np.ones([2, 40, 30, 3]),
                model.visual_in[1]: np.ones([2, 40, 30, 3]),
                model.epsilon: np.array([[0, 1], [2, 3]])
            }
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
示例#2
0
def test_ppo_model_dc_vector_curio(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=True, visual_inputs=0)
            env = UnityEnvironment(' ')
            model = PPOModel(env.brains["RealFakeBrain"], use_curiosity=True)
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [
                model.output, model.all_log_probs, model.value, model.entropy,
                model.learning_rate, model.intrinsic_reward
            ]
            feed_dict = {
                model.batch_size:
                2,
                model.sequence_length:
                1,
                model.vector_in:
                np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
                model.next_vector_in:
                np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
                model.action_holder: [[0], [0]],
                model.action_masks:
                np.ones([2, 2])
            }
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
def test_initialize_trainers(mock_communicator, mock_launcher, dummy_config,
                             dummy_offline_bc_config, dummy_online_bc_config,
                             dummy_bad_config):
    open_name = 'mlagents.trainers.trainer_controller' + '.open'
    with mock.patch('yaml.load') as mock_load:
        with mock.patch(open_name, create=True) as _:
            mock_communicator.return_value = MockCommunicator(
                discrete_action=True, visual_inputs=1)
            tc = TrainerController(' ', ' ', 1, None, True, False, False, 1, 1,
                                   1, 1, '', "tests/test_mlagents.trainers.py",
                                   False)

            # Test for PPO trainer
            mock_load.return_value = dummy_config
            config = tc._load_config()
            tf.reset_default_graph()
            tc._initialize_trainers(config)
            assert (len(tc.trainers) == 1)
            assert (isinstance(tc.trainers['RealFakeBrain'], PPOTrainer))

            # Test for Online Behavior Cloning Trainer
            mock_load.return_value = dummy_online_bc_config
            config = tc._load_config()
            tf.reset_default_graph()
            tc._initialize_trainers(config)
            assert (isinstance(tc.trainers['RealFakeBrain'], OnlineBCTrainer))

            # Test for proper exception when trainer name is incorrect
            mock_load.return_value = dummy_bad_config
            config = tc._load_config()
            tf.reset_default_graph()
            with pytest.raises(UnityEnvironmentException):
                tc._initialize_trainers(config)
示例#4
0
def test_ppo_model_cc_vector_rnn(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=False, visual_inputs=0)
            env = UnityEnvironment(' ')
            memory_size = 128
            model = PPOModel(env.brains["RealFakeBrain"],
                             use_recurrent=True,
                             m_size=memory_size)
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [
                model.output, model.all_log_probs, model.value, model.entropy,
                model.learning_rate, model.memory_out
            ]
            feed_dict = {
                model.batch_size: 1,
                model.sequence_length: 2,
                model.memory_in: np.zeros((1, memory_size)),
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                           [3, 4, 5, 3, 4, 5]]),
                model.epsilon: np.array([[0, 1]])
            }
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
示例#5
0
def test_step(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(' ')
    brain = env.brains['RealFakeBrain']
    brain_info = env.reset()
    brain_info = env.step([0] * brain.vector_action_space_size[0] *
                          len(brain_info['RealFakeBrain'].agents))
    with pytest.raises(UnityActionException):
        env.step([0])
    brain_info = env.step([-1] * brain.vector_action_space_size[0] *
                          len(brain_info['RealFakeBrain'].agents))
    with pytest.raises(UnityActionException):
        env.step([0] * brain.vector_action_space_size[0] *
                 len(brain_info['RealFakeBrain'].agents))
    env.close()
    assert env.global_done
    assert isinstance(brain_info, dict)
    assert isinstance(brain_info['RealFakeBrain'], BrainInfo)
    assert isinstance(brain_info['RealFakeBrain'].visual_observations, list)
    assert isinstance(brain_info['RealFakeBrain'].vector_observations,
                      np.ndarray)
    assert len(brain_info['RealFakeBrain'].visual_observations
               ) == brain.number_visual_observations
    assert brain_info['RealFakeBrain'].vector_observations.shape[0] == \
           len(brain_info['RealFakeBrain'].agents)
    assert brain_info['RealFakeBrain'].vector_observations.shape[1] == \
           brain.vector_observation_space_size * brain.num_stacked_vector_observations

    print("\n\n\n\n\n\n\n" + str(brain_info['RealFakeBrain'].local_done))
    assert not brain_info['RealFakeBrain'].local_done[0]
    assert brain_info['RealFakeBrain'].local_done[2]
示例#6
0
def test_close(mock_communicator, mock_launcher):
    comm = MockCommunicator(discrete_action=False, visual_inputs=0)
    mock_communicator.return_value = comm
    env = UnityEnvironment(' ')
    assert env._loaded
    env.close()
    assert not env._loaded
    assert comm.has_been_closed
示例#7
0
def test_initialization(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(' ')
    with pytest.raises(UnityActionException):
        env.step([0])
    assert env.brain_names[0] == 'RealFakeBrain'
    env.close()
def test_load_config(mock_communicator, mock_launcher, dummy_config):
    open_name = 'mlagents.trainers.trainer_controller' + '.open'
    with mock.patch('yaml.load') as mock_load:
        with mock.patch(open_name, create=True) as _:
            mock_load.return_value = dummy_config
            mock_communicator.return_value = MockCommunicator(
                discrete_action=True, visual_inputs=1)
            mock_load.return_value = dummy_config
            tc = TrainerController(' ', ' ', 1, None, True, True, False, 1, 1,
                                   1, 1, '', '', False)
            config = tc._load_config()
            assert (len(config) == 1)
            assert (config['default']['trainer'] == "ppo")
示例#9
0
def test_ppo_policy_evaluate(mock_communicator, mock_launcher, dummy_config):
    tf.reset_default_graph()
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(' ')
    brain_infos = env.reset()
    brain_info = brain_infos[env.brain_names[0]]

    trainer_parameters = dummy_config
    model_path = env.brain_names[0]
    trainer_parameters['model_path'] = model_path
    trainer_parameters['keep_checkpoints'] = 3
    policy = PPOPolicy(0, env.brains[env.brain_names[0]], trainer_parameters,
                       False, False)
    run_out = policy.evaluate(brain_info)
    assert run_out['action'].shape == (3, 2)
    env.close()
示例#10
0
def test_cc_bc_model(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=False, visual_inputs=0)
            env = UnityEnvironment(' ')
            model = BehavioralCloningModel(env.brains["RealFakeBrain"])
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [model.sample_action, model.policy]
            feed_dict = {model.batch_size: 2,
                         model.sequence_length: 1,
                         model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                                    [3, 4, 5, 3, 4, 5]])}
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
示例#11
0
def test_multi_agent(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0,
                                                      stack=False,
                                                      num_agents=2)

    # Test for incorrect number of agents.
    with pytest.raises(UnityGymException):
        UnityEnv(' ', multiagent=False)

    env = UnityEnv(' ', use_visual=False, multiagent=True)
    assert isinstance(env.reset(), list)
    actions = [env.action_space.sample() for i in range(env.number_agents)]
    obs, rew, done, info = env.step(actions)
    assert isinstance(obs, list)
    assert isinstance(rew, list)
    assert isinstance(done, list)
    assert isinstance(info, dict)
示例#12
0
def test_ppo_policy_evaluate(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        mock_communicator.return_value = MockCommunicator(
            discrete_action=False, visual_inputs=0)
        env = UnityEnvironment(' ')
        brain_infos = env.reset()
        brain_info = brain_infos[env.brain_names[0]]

        trainer_parameters = dummy_config()
        graph_scope = env.brain_names[0]
        trainer_parameters['graph_scope'] = graph_scope
        policy = PPOPolicy(0, env.brains[env.brain_names[0]],
                           trainer_parameters, sess, False)
        init = tf.global_variables_initializer()
        sess.run(init)
        run_out = policy.evaluate(brain_info)
        assert run_out['action'].shape == (3, 2)
        env.close()
示例#13
0
def test_reset(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(' ')
    brain = env.brains['RealFakeBrain']
    brain_info = env.reset()
    env.close()
    assert not env.global_done
    assert isinstance(brain_info, dict)
    assert isinstance(brain_info['RealFakeBrain'], BrainInfo)
    assert isinstance(brain_info['RealFakeBrain'].visual_observations, list)
    assert isinstance(brain_info['RealFakeBrain'].vector_observations,
                      np.ndarray)
    assert len(brain_info['RealFakeBrain'].visual_observations
               ) == brain.number_visual_observations
    assert brain_info['RealFakeBrain'].vector_observations.shape[0] == \
           len(brain_info['RealFakeBrain'].agents)
    assert brain_info['RealFakeBrain'].vector_observations.shape[1] == \
           brain.vector_observation_space_size * brain.num_stacked_vector_observations
示例#14
0
def test_gym_wrapper(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0,
                                                      stack=False,
                                                      num_agents=1)

    # Test for incorrect number of agents.
    with pytest.raises(UnityGymException):
        UnityEnv(' ', use_visual=False, multiagent=True)

    env = UnityEnv(' ', use_visual=False)
    assert isinstance(env, UnityEnv)
    assert isinstance(env.reset(), np.ndarray)
    actions = env.action_space.sample()
    assert actions.shape[0] == 2
    obs, rew, done, info = env.step(actions)
    assert isinstance(obs, np.ndarray)
    assert isinstance(rew, float)
    assert isinstance(done, bool)
    assert isinstance(info, dict)
def test_initialize_offline_trainers(mock_communicator, mock_launcher,
                                     dummy_config, dummy_offline_bc_config,
                                     dummy_online_bc_config, dummy_bad_config):
    open_name = 'mlagents.trainers.trainer_controller' + '.open'
    with mock.patch('yaml.load') as mock_load:
        with mock.patch(open_name, create=True) as _:
            mock_communicator.return_value = MockCommunicator(
                discrete_action=False,
                stack=False,
                visual_inputs=0,
                brain_name="Ball3DBrain",
                vec_obs_size=8)
            tc = TrainerController(' ', ' ', 1, None, True, False, False, 1, 1,
                                   1, 1, '', "tests/test_mlagents.trainers.py",
                                   False)

            # Test for Offline Behavior Cloning Trainer
            mock_load.return_value = dummy_offline_bc_config
            config = tc._load_config()
            tf.reset_default_graph()
            tc._initialize_trainers(config)
            assert (isinstance(tc.trainers['Ball3DBrain'], OfflineBCTrainer))
def test_initialization(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(discrete_action=True,
                                                      visual_inputs=1)
    tc = TrainerController(' ', ' ', 1, None, True, True, False, 1, 1, 1, 1,
                           '', "tests/test_mlagents.trainers.py", False)
    assert (tc.env.brain_names[0] == 'RealFakeBrain')