示例#1
0
def test_poca_optimizer_update_gail(gail_dummy_config,
                                    dummy_config):  # noqa: F811
    # Test evaluate
    dummy_config.reward_signals = gail_dummy_config
    config = poca_dummy_config()
    optimizer = create_test_poca_optimizer(config,
                                           use_rnn=False,
                                           use_discrete=False,
                                           use_visual=False)
    # Test update
    update_buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES,
                                        optimizer.policy.behavior_spec)
    # Mock out reward signal eval
    copy_buffer_fields(
        update_buffer,
        src_key=BufferKey.ENVIRONMENT_REWARDS,
        dst_keys=[
            BufferKey.ADVANTAGES,
            RewardSignalUtil.returns_key("extrinsic"),
            RewardSignalUtil.value_estimates_key("extrinsic"),
            RewardSignalUtil.baseline_estimates_key("extrinsic"),
            RewardSignalUtil.returns_key("gail"),
            RewardSignalUtil.value_estimates_key("gail"),
            RewardSignalUtil.baseline_estimates_key("gail"),
        ],
    )

    update_buffer[BufferKey.CONTINUOUS_LOG_PROBS] = np.ones_like(
        update_buffer[BufferKey.CONTINUOUS_ACTION])
    optimizer.update(
        update_buffer,
        num_sequences=update_buffer.num_experiences //
        optimizer.policy.sequence_length,
    )

    # Check if buffer size is too big
    update_buffer = mb.simulate_rollout(3000, optimizer.policy.behavior_spec)
    # Mock out reward signal eval
    copy_buffer_fields(
        update_buffer,
        src_key=BufferKey.ENVIRONMENT_REWARDS,
        dst_keys=[
            BufferKey.ADVANTAGES,
            RewardSignalUtil.returns_key("extrinsic"),
            RewardSignalUtil.value_estimates_key("extrinsic"),
            RewardSignalUtil.baseline_estimates_key("extrinsic"),
            RewardSignalUtil.returns_key("gail"),
            RewardSignalUtil.value_estimates_key("gail"),
            RewardSignalUtil.baseline_estimates_key("gail"),
        ],
    )
    optimizer.update(
        update_buffer,
        num_sequences=update_buffer.num_experiences //
        optimizer.policy.sequence_length,
    )
示例#2
0
def dummy_config():
    return poca_dummy_config()
示例#3
0
from mlagents.trainers.tests.dummy_config import (
    ppo_dummy_config,
    sac_dummy_config,
    poca_dummy_config,
)
from mlagents.trainers.tests.check_env_trains import (
    check_environment_trains,
    default_reward_processor,
)

BRAIN_NAME = "1D"

PPO_TORCH_CONFIG = ppo_dummy_config()
SAC_TORCH_CONFIG = sac_dummy_config()
POCA_TORCH_CONFIG = poca_dummy_config()

# tests in this file won't be tested on GPU machine
pytestmark = pytest.mark.check_environment_trains


@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_poca(action_sizes):
    env = MultiAgentEnvironment([BRAIN_NAME],
                                action_sizes=action_sizes,
                                num_agents=2)
    config = attr.evolve(POCA_TORCH_CONFIG)
    check_environment_trains(env, {BRAIN_NAME: config})


@pytest.mark.parametrize("num_visual", [1, 2])
示例#4
0
def poca_config():
    return RunOptions(behaviors={"test_brain": poca_dummy_config()})