示例#1
0
def test_ppo_model_cc_vector_rnn(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=False, visual_inputs=0)
            env = UnityEnvironment(' ')
            memory_size = 128
            model = PPOModel(env.brains["RealFakeBrain"],
                             use_recurrent=True,
                             m_size=memory_size)
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [
                model.output, model.all_log_probs, model.value, model.entropy,
                model.learning_rate, model.memory_out
            ]
            feed_dict = {
                model.batch_size: 1,
                model.sequence_length: 2,
                model.memory_in: np.zeros((1, memory_size)),
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                           [3, 4, 5, 3, 4, 5]]),
                model.epsilon: np.array([[0, 1]])
            }
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
示例#2
0
def test_ppo_model_cc_visual(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=False, visual_inputs=2)
            env = UnityEnvironment(' ')

            model = PPOModel(env.brains["RealFakeBrain"])
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [
                model.output, model.log_probs, model.value, model.entropy,
                model.learning_rate
            ]
            feed_dict = {
                model.batch_size: 2,
                model.sequence_length: 1,
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                           [3, 4, 5, 3, 4, 5]]),
                model.visual_in[0]: np.ones([2, 40, 30, 3]),
                model.visual_in[1]: np.ones([2, 40, 30, 3]),
                model.epsilon: np.array([[0, 1], [2, 3]])
            }
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
示例#3
0
def test_ppo_model_dc_vector_curio(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=True, visual_inputs=0)
            env = UnityEnvironment(' ')
            model = PPOModel(env.brains["RealFakeBrain"], use_curiosity=True)
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [
                model.output, model.all_log_probs, model.value, model.entropy,
                model.learning_rate, model.intrinsic_reward
            ]
            feed_dict = {
                model.batch_size:
                2,
                model.sequence_length:
                1,
                model.vector_in:
                np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
                model.next_vector_in:
                np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
                model.action_holder: [[0], [0]],
                model.action_masks:
                np.ones([2, 2])
            }
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
示例#4
0
def test_step(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(
        discrete_action=False, visual_inputs=0)
    env = UnityEnvironment(' ')
    brain = env.brains['RealFakeBrain']
    brain_info = env.reset()
    brain_info = env.step([0] * brain.vector_action_space_size[0] * len(brain_info['RealFakeBrain'].agents))
    with pytest.raises(UnityActionException):
        env.step([0])
    brain_info = env.step([-1] * brain.vector_action_space_size[0] * len(brain_info['RealFakeBrain'].agents))
    with pytest.raises(UnityActionException):
        env.step([0] * brain.vector_action_space_size[0] * len(brain_info['RealFakeBrain'].agents))
    env.close()
    assert env.global_done
    assert isinstance(brain_info, dict)
    assert isinstance(brain_info['RealFakeBrain'], BrainInfo)
    assert isinstance(brain_info['RealFakeBrain'].visual_observations, list)
    assert isinstance(brain_info['RealFakeBrain'].vector_observations, np.ndarray)
    assert len(brain_info['RealFakeBrain'].visual_observations) == brain.number_visual_observations
    assert len(brain_info['RealFakeBrain'].vector_observations) == \
           len(brain_info['RealFakeBrain'].agents)
    assert len(brain_info['RealFakeBrain'].vector_observations[0]) == \
           brain.vector_observation_space_size * brain.num_stacked_vector_observations

    print("\n\n\n\n\n\n\n" + str(brain_info['RealFakeBrain'].local_done))
    assert not brain_info['RealFakeBrain'].local_done[0]
    assert brain_info['RealFakeBrain'].local_done[2]
示例#5
0
def test_initialization(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(
        discrete_action=False, visual_inputs=0)
    env = UnityEnvironment(' ')
    with pytest.raises(UnityActionException):
        env.step([0])
    assert env.brain_names[0] == 'RealFakeBrain'
    env.close()
示例#6
0
def test_close(mock_communicator, mock_launcher):
    comm = MockCommunicator(
        discrete_action=False, visual_inputs=0)
    mock_communicator.return_value = comm
    env = UnityEnvironment(' ')
    assert env._loaded
    env.close()
    assert not env._loaded
    assert comm.has_been_closed
示例#7
0
def test_ppo_policy_evaluate(mock_communicator, mock_launcher, dummy_config):
    tf.reset_default_graph()
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(' ')
    brain_infos = env.reset()
    brain_info = brain_infos[env.brain_names[0]]

    trainer_parameters = dummy_config
    model_path = env.brain_names[0]
    trainer_parameters['model_path'] = model_path
    trainer_parameters['keep_checkpoints'] = 3
    policy = PPOPolicy(0, env.brains[env.brain_names[0]], trainer_parameters,
                       False, False)
    run_out = policy.evaluate(brain_info)
    assert run_out['action'].shape == (3, 2)
    env.close()
示例#8
0
def test_reset(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(
        discrete_action=False, visual_inputs=0)
    env = UnityEnvironment(' ')
    brain = env.brains['RealFakeBrain']
    brain_info = env.reset()
    env.close()
    assert not env.global_done
    assert isinstance(brain_info, dict)
    assert isinstance(brain_info['RealFakeBrain'], BrainInfo)
    assert isinstance(brain_info['RealFakeBrain'].visual_observations, list)
    assert isinstance(brain_info['RealFakeBrain'].vector_observations, np.ndarray)
    assert len(brain_info['RealFakeBrain'].visual_observations) == brain.number_visual_observations
    assert len(brain_info['RealFakeBrain'].vector_observations) == \
           len(brain_info['RealFakeBrain'].agents)
    assert len(brain_info['RealFakeBrain'].vector_observations[0]) == \
           brain.vector_observation_space_size * brain.num_stacked_vector_observations
示例#9
0
def test_cc_bc_model(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=False, visual_inputs=0)
            env = UnityEnvironment(' ')
            model = BehavioralCloningModel(env.brains["RealFakeBrain"])
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [model.sample_action, model.policy]
            feed_dict = {
                model.batch_size: 2,
                model.sequence_length: 1,
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                           [3, 4, 5, 3, 4, 5]])
            }
            sess.run(run_list, feed_dict=feed_dict)
            env.close()