Пример #1
0
def test_agentprocessor(num_vis_obs):
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )

    fake_action_outputs = {
        "action": ActionTuple(continuous=np.array([[0.1], [0.1]])),
        "entropy": np.array([1.0], dtype=np.float32),
        "learning_rate": 1.0,
        "log_probs": LogProbsTuple(continuous=np.array([[0.1], [0.1]])),
    }
    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=2,
        sensor_specs=create_sensor_specs_with_shapes([(8, )] + num_vis_obs *
                                                     [(84, 84, 3)]),
        action_spec=ActionSpec.create_continuous(2),
    )
    fake_action_info = ActionInfo(
        action=ActionTuple(continuous=np.array([[0.1], [0.1]])),
        env_action=ActionTuple(continuous=np.array([[0.1], [0.1]])),
        value=[0.1, 0.1],
        outputs=fake_action_outputs,
        agent_ids=mock_decision_steps.agent_id,
    )
    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(mock_decision_steps, mock_terminal_steps, 0,
                              ActionInfo.empty())
    for _ in range(5):
        processor.add_experiences(mock_decision_steps, mock_terminal_steps, 0,
                                  fake_action_info)

    # Assert that two trajectories have been added to the Trainer
    assert len(tqueue.put.call_args_list) == 2

    # Assert that the trajectory is of length 5
    trajectory = tqueue.put.call_args_list[0][0][0]
    assert len(trajectory.steps) == 5

    # Assert that the AgentProcessor is empty
    assert len(processor.experience_buffers[0]) == 0

    # Test empty steps
    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=0,
        sensor_specs=create_sensor_specs_with_shapes([(8, )] + num_vis_obs *
                                                     [(84, 84, 3)]),
        action_spec=ActionSpec.create_continuous(2),
    )
    processor.add_experiences(mock_decision_steps, mock_terminal_steps, 0,
                              ActionInfo.empty())
    # Assert that the AgentProcessor is still empty
    assert len(processor.experience_buffers[0]) == 0
Пример #2
0
def test_create_inputs(encoder_type, normalize, num_vector, num_visual):
    vec_obs_shape = (5, )
    vis_obs_shape = (84, 84, 3)
    obs_shapes = []
    for _ in range(num_vector):
        obs_shapes.append(vec_obs_shape)
    for _ in range(num_visual):
        obs_shapes.append(vis_obs_shape)
    h_size = 128
    sen_spec = create_sensor_specs_with_shapes(obs_shapes)
    encoders, embedding_sizes = ModelUtils.create_input_processors(
        sen_spec, h_size, encoder_type, normalize)
    total_output = sum(embedding_sizes)
    vec_enc = []
    vis_enc = []
    for i, enc in enumerate(encoders):
        if len(obs_shapes[i]) == 1:
            vec_enc.append(enc)
        else:
            vis_enc.append(enc)
    assert len(vec_enc) == num_vector
    assert len(vis_enc) == num_visual
    assert total_output == int(num_visual * h_size +
                               vec_obs_shape[0] * num_vector)
    if num_vector > 0:
        assert isinstance(vec_enc[0], VectorInput)

    for enc in vis_enc:
        assert isinstance(enc, ModelUtils.get_encoder_for_type(encoder_type))
Пример #3
0
def test_valuenetwork():
    torch.manual_seed(0)
    obs_size = 4
    num_outputs = 2
    network_settings = NetworkSettings()
    sen_spec = create_sensor_specs_with_shapes([(obs_size, )])

    stream_names = [f"stream_name{n}" for n in range(4)]
    value_net = ValueNetwork(stream_names,
                             sen_spec,
                             network_settings,
                             outputs_per_stream=num_outputs)
    optimizer = torch.optim.Adam(value_net.parameters(), lr=3e-3)

    for _ in range(50):
        sample_obs = torch.ones((1, obs_size))
        values, _ = value_net([sample_obs])
        loss = 0
        for s_name in stream_names:
            assert values[s_name].shape == (1, num_outputs)
            # Try to force output to 1
            loss += torch.nn.functional.mse_loss(values[s_name],
                                                 torch.ones((1, num_outputs)))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # In the last step, values should be close to 1
    for value in values.values():
        for _out in value:
            assert _out[0] == pytest.approx(1.0, abs=0.1)
Пример #4
0
def test_trajectory_to_agentbuffer():
    length = 15
    wanted_keys = [
        "next_obs_0",
        "next_obs_1",
        "obs_0",
        "obs_1",
        "memory",
        "masks",
        "done",
        "continuous_action",
        "discrete_action",
        "continuous_log_probs",
        "discrete_log_probs",
        "action_mask",
        "prev_action",
        "environment_rewards",
    ]
    wanted_keys = set(wanted_keys)
    trajectory = make_fake_trajectory(
        length=length,
        sensor_specs=create_sensor_specs_with_shapes([(VEC_OBS_SIZE,), (84, 84, 3)]),
        action_spec=ActionSpec.create_continuous(ACTION_SIZE),
    )
    agentbuffer = trajectory.to_agentbuffer()
    seen_keys = set()
    for key, field in agentbuffer.items():
        assert len(field) == length
        seen_keys.add(key)

    assert seen_keys == wanted_keys
Пример #5
0
def test_networkbody_visual():
    torch.manual_seed(0)
    vec_obs_size = 4
    obs_size = (84, 84, 3)
    network_settings = NetworkSettings()
    obs_shapes = [(vec_obs_size, ), obs_size]

    networkbody = NetworkBody(create_sensor_specs_with_shapes(obs_shapes),
                              network_settings)
    optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
    sample_obs = 0.1 * torch.ones((1, 84, 84, 3))
    sample_vec_obs = torch.ones((1, vec_obs_size))
    obs = [sample_vec_obs] + [sample_obs]

    for _ in range(150):
        encoded, _ = networkbody(obs)
        assert encoded.shape == (1, network_settings.hidden_units)
        # Try to force output to 1
        loss = torch.nn.functional.mse_loss(encoded, torch.ones(encoded.shape))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # In the last step, values should be close to 1
    for _enc in encoded.flatten():
        assert _enc == pytest.approx(1.0, abs=0.1)
Пример #6
0
def create_mock_group_spec(
    number_visual_observations=0,
    vector_action_space_type="continuous",
    vector_observation_space_size=3,
    vector_action_space_size=None,
):
    """
    Creates a mock BrainParameters object with parameters.
    """
    # Avoid using mutable object as default param
    if vector_action_space_type == "continuous":
        if vector_action_space_size is None:
            vector_action_space_size = 2
        else:
            vector_action_space_size = vector_action_space_size[0]
        action_spec = ActionSpec.create_continuous(vector_action_space_size)
    else:
        if vector_action_space_size is None:
            vector_action_space_size = (2, )
        else:
            vector_action_space_size = tuple(vector_action_space_size)
        action_spec = ActionSpec.create_discrete(vector_action_space_size)
    obs_shapes = [(vector_observation_space_size, )]
    for _ in range(number_visual_observations):
        obs_shapes += [(8, 8, 3)]
    sen_spec = create_sensor_specs_with_shapes(obs_shapes)
    return BehaviorSpec(sen_spec, action_spec)
Пример #7
0
def test_batched_step_result_from_proto():
    n_agents = 10
    shapes = [(3, ), (4, )]
    spec = BehaviorSpec(create_sensor_specs_with_shapes(shapes),
                        ActionSpec.create_continuous(3))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, spec)
    for agent_id in range(n_agents):
        if agent_id in decision_steps:
            # we set the reward equal to the agent id in generate_list_agent_proto
            assert decision_steps[agent_id].reward == agent_id
        elif agent_id in terminal_steps:
            assert terminal_steps[agent_id].reward == agent_id
        else:
            raise Exception("Missing agent from the steps")
    # We sort the AgentId since they are split between DecisionSteps and TerminalSteps
    combined_agent_id = list(decision_steps.agent_id) + list(
        terminal_steps.agent_id)
    combined_agent_id.sort()
    assert combined_agent_id == list(range(n_agents))
    for agent_id in range(n_agents):
        assert (agent_id in terminal_steps) == (agent_id % 2 == 0)
        if agent_id in terminal_steps:
            assert terminal_steps[agent_id].interrupted == (agent_id % 4 == 0)
    assert decision_steps.obs[0].shape[1] == shapes[0][0]
    assert decision_steps.obs[1].shape[1] == shapes[1][0]
    assert terminal_steps.obs[0].shape[1] == shapes[0][0]
    assert terminal_steps.obs[1].shape[1] == shapes[1][0]
Пример #8
0
 def _make_sensor_specs(self) -> SensorSpec:
     obs_shape: List[Any] = []
     for _ in range(self.num_vector):
         obs_shape.append((self.vec_obs_size, ))
     for _ in range(self.num_visual):
         obs_shape.append(self.vis_obs_size)
     sen_spec = create_sensor_specs_with_shapes(obs_shape)
     return sen_spec
Пример #9
0
def test_empty_terminal_steps():
    specs = BehaviorSpec(
        sensor_specs=create_sensor_specs_with_shapes([(3, 2), (5, )]),
        action_spec=ActionSpec.create_continuous(3),
    )
    ts = TerminalSteps.empty(specs)
    assert len(ts.obs) == 2
    assert ts.obs[0].shape == (0, 3, 2)
    assert ts.obs[1].shape == (0, 5)
Пример #10
0
def test_action_masking_continuous():
    n_agents = 10
    shapes = [(3, ), (4, )]
    behavior_spec = BehaviorSpec(create_sensor_specs_with_shapes(shapes),
                                 ActionSpec.create_continuous(10))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
    assert masks is None
Пример #11
0
def test_empty_decision_steps():
    specs = BehaviorSpec(
        sensor_specs=create_sensor_specs_with_shapes([(3, 2), (5, )]),
        action_spec=ActionSpec.create_continuous(3),
    )
    ds = DecisionSteps.empty(specs)
    assert len(ds.obs) == 2
    assert ds.obs[0].shape == (0, 3, 2)
    assert ds.obs[1].shape == (0, 5)
Пример #12
0
def test_actor_critic(ac_type, lstm):
    obs_size = 4
    network_settings = NetworkSettings(
        memory=NetworkSettings.MemorySettings() if lstm else None,
        normalize=True)
    sen_spec = create_sensor_specs_with_shapes([(obs_size, )])
    act_size = 2
    mask = torch.ones([1, act_size * 2])
    stream_names = [f"stream_name{n}" for n in range(4)]
    # action_spec = ActionSpec.create_continuous(act_size[0])
    action_spec = ActionSpec(act_size,
                             tuple(act_size for _ in range(act_size)))
    actor = ac_type(sen_spec, network_settings, action_spec, stream_names)
    if lstm:
        sample_obs = torch.ones(
            (1, network_settings.memory.sequence_length, obs_size))
        memories = torch.ones(
            (1, network_settings.memory.sequence_length, actor.memory_size))
    else:
        sample_obs = torch.ones((1, obs_size))
        memories = torch.tensor([])
        # memories isn't always set to None, the network should be able to
        # deal with that.
    # Test critic pass
    value_out, memories_out = actor.critic_pass([sample_obs],
                                                memories=memories)
    for stream in stream_names:
        if lstm:
            assert value_out[stream].shape == (
                network_settings.memory.sequence_length, )
            assert memories_out.shape == memories.shape
        else:
            assert value_out[stream].shape == (1, )

    # Test get action stats and_value
    action, log_probs, entropies, value_out, mem_out = actor.get_action_stats_and_value(
        [sample_obs], memories=memories, masks=mask)
    if lstm:
        assert action.continuous_tensor.shape == (64, 2)
    else:
        assert action.continuous_tensor.shape == (1, 2)

    assert len(action.discrete_list) == 2
    for _disc in action.discrete_list:
        if lstm:
            assert _disc.shape == (64, 1)
        else:
            assert _disc.shape == (1, 1)

    if mem_out is not None:
        assert mem_out.shape == memories.shape
    for stream in stream_names:
        if lstm:
            assert value_out[stream].shape == (
                network_settings.memory.sequence_length, )
        else:
            assert value_out[stream].shape == (1, )
Пример #13
0
def test_batched_step_result_from_proto_raises_on_nan():
    n_agents = 10
    shapes = [(3, ), (4, )]
    behavior_spec = BehaviorSpec(create_sensor_specs_with_shapes(shapes),
                                 ActionSpec.create_continuous(3))
    ap_list = generate_list_agent_proto(n_agents,
                                        shapes,
                                        nan_observations=True)
    with pytest.raises(RuntimeError):
        steps_from_proto(ap_list, behavior_spec)
Пример #14
0
def test_end_episode():
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )
    fake_action_outputs = {
        "action": ActionTuple(continuous=np.array([[0.1]])),
        "entropy": np.array([1.0], dtype=np.float32),
        "learning_rate": 1.0,
        "log_probs": LogProbsTuple(continuous=np.array([[0.1]])),
    }

    mock_decision_step, mock_terminal_step = mb.create_mock_steps(
        num_agents=1,
        sensor_specs=create_sensor_specs_with_shapes([(8,)]),
        action_spec=ActionSpec.create_continuous(2),
    )
    fake_action_info = ActionInfo(
        action=ActionTuple(continuous=np.array([[0.1]])),
        env_action=ActionTuple(continuous=np.array([[0.1]])),
        value=[0.1],
        outputs=fake_action_outputs,
        agent_ids=mock_decision_step.agent_id,
    )

    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(
        mock_decision_step, mock_terminal_step, 0, ActionInfo.empty()
    )
    # Run 3 trajectories, with different workers (to simulate different agents)
    remove_calls = []
    for _ep in range(3):
        remove_calls.append(mock.call([get_global_agent_id(_ep, 0)]))
        for _ in range(5):
            processor.add_experiences(
                mock_decision_step, mock_terminal_step, _ep, fake_action_info
            )
            # Make sure we don't add experiences from the prior agents after the done

    # Call end episode
    processor.end_episode()
    # Check that we removed every agent
    policy.remove_previous_action.assert_has_calls(remove_calls)
    # Check that there are no experiences left
    assert len(processor.experience_buffers.keys()) == 0
    assert len(processor.last_take_action_outputs.keys()) == 0
    assert len(processor.episode_steps.keys()) == 0
    assert len(processor.episode_rewards.keys()) == 0
Пример #15
0
def test_summary_checkpoint(mock_add_checkpoint, mock_write_summary):
    trainer = create_rl_trainer()
    mock_policy = mock.Mock()
    trainer.add_policy("TestBrain", mock_policy)
    trajectory_queue = AgentManagerQueue("testbrain")
    policy_queue = AgentManagerQueue("testbrain")
    trainer.subscribe_trajectory_queue(trajectory_queue)
    trainer.publish_policy_queue(policy_queue)
    time_horizon = 10
    summary_freq = trainer.trainer_settings.summary_freq
    checkpoint_interval = trainer.trainer_settings.checkpoint_interval
    trajectory = mb.make_fake_trajectory(
        length=time_horizon,
        sensor_specs=create_sensor_specs_with_shapes([(1, )]),
        max_step_complete=True,
        action_spec=ActionSpec.create_discrete((2, )),
    )
    # Check that we can turn off the trainer and that the buffer is cleared
    num_trajectories = 5
    for _ in range(0, num_trajectories):
        trajectory_queue.put(trajectory)
        trainer.advance()
        # Check that there is stuff in the policy queue
        policy_queue.get_nowait()

    # Check that we have called write_summary the appropriate number of times
    calls = [
        mock.call(step) for step in range(summary_freq, num_trajectories *
                                          time_horizon, summary_freq)
    ]
    mock_write_summary.assert_has_calls(calls, any_order=True)

    checkpoint_range = range(checkpoint_interval,
                             num_trajectories * time_horizon,
                             checkpoint_interval)
    calls = [mock.call(trainer.brain_name, step) for step in checkpoint_range]

    trainer.model_saver.save_checkpoint.assert_has_calls(calls, any_order=True)
    export_ext = "onnx"

    add_checkpoint_calls = [
        mock.call(
            trainer.brain_name,
            ModelCheckpoint(
                step,
                f"{trainer.model_saver.model_path}{os.path.sep}{trainer.brain_name}-{step}.{export_ext}",
                None,
                mock.ANY,
            ),
            trainer.trainer_settings.keep_checkpoints,
        ) for step in checkpoint_range
    ]
    mock_add_checkpoint.assert_has_calls(add_checkpoint_calls)
Пример #16
0
def test_action_masking_discrete_1():
    n_agents = 10
    shapes = [(3, ), (4, )]
    behavior_spec = BehaviorSpec(create_sensor_specs_with_shapes(shapes),
                                 ActionSpec.create_discrete((10, )))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
    assert isinstance(masks, list)
    assert len(masks) == 1
    assert masks[0].shape == (n_agents / 2, 10)
    assert masks[0][0, 0]
Пример #17
0
def setup_test_behavior_specs(use_discrete=True,
                              use_visual=False,
                              vector_action_space=2,
                              vector_obs_space=8):
    if use_discrete:
        action_spec = ActionSpec.create_discrete(tuple(vector_action_space))
    else:
        action_spec = ActionSpec.create_continuous(vector_action_space)
    observation_shapes = [(84, 84, 3)] * int(use_visual) + [
        (vector_obs_space, )
    ]
    sen_spec = create_sensor_specs_with_shapes(observation_shapes)
    behavior_spec = BehaviorSpec(sen_spec, action_spec)
    return behavior_spec
Пример #18
0
def test_process_trajectory(dummy_config):
    mock_specs = mb.setup_test_behavior_specs(
        True, False, vector_action_space=[2], vector_obs_space=1
    )
    behavior_id_team0 = "test_brain?team=0"
    behavior_id_team1 = "test_brain?team=1"
    brain_name = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0).brain_name

    ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, "0")
    controller = GhostController(100)
    trainer = GhostTrainer(
        ppo_trainer, brain_name, controller, 0, dummy_config, True, "0"
    )

    # first policy encountered becomes policy trained by wrapped PPO
    parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0)
    policy = trainer.create_policy(parsed_behavior_id0, mock_specs)
    trainer.add_policy(parsed_behavior_id0, policy)
    trajectory_queue0 = AgentManagerQueue(behavior_id_team0)
    trainer.subscribe_trajectory_queue(trajectory_queue0)

    # Ghost trainer should ignore this queue because off policy
    parsed_behavior_id1 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team1)
    policy = trainer.create_policy(parsed_behavior_id1, mock_specs)
    trainer.add_policy(parsed_behavior_id1, policy)
    trajectory_queue1 = AgentManagerQueue(behavior_id_team1)
    trainer.subscribe_trajectory_queue(trajectory_queue1)

    time_horizon = 15
    trajectory = make_fake_trajectory(
        length=time_horizon,
        max_step_complete=True,
        sensor_specs=create_sensor_specs_with_shapes([(1,)]),
        action_spec=mock_specs.action_spec,
    )
    trajectory_queue0.put(trajectory)
    trainer.advance()

    # Check that trainer put trajectory in update buffer
    assert trainer.trainer.update_buffer.num_experiences == 15

    trajectory_queue1.put(trajectory)
    trainer.advance()

    # Check that ghost trainer ignored off policy queue
    assert trainer.trainer.update_buffer.num_experiences == 15
    # Check that it emptied the queue
    assert trajectory_queue1.empty()
Пример #19
0
def test_advance(mocked_clear_update_buffer, mocked_save_model):
    trainer = create_rl_trainer()
    mock_policy = mock.Mock()
    trainer.add_policy("TestBrain", mock_policy)
    trajectory_queue = AgentManagerQueue("testbrain")
    policy_queue = AgentManagerQueue("testbrain")
    trainer.subscribe_trajectory_queue(trajectory_queue)
    trainer.publish_policy_queue(policy_queue)
    time_horizon = 10
    trajectory = mb.make_fake_trajectory(
        length=time_horizon,
        sensor_specs=create_sensor_specs_with_shapes([(1, )]),
        max_step_complete=True,
        action_spec=ActionSpec.create_discrete((2, )),
    )
    trajectory_queue.put(trajectory)

    trainer.advance()
    policy_queue.get_nowait()
    # Check that get_step is correct
    assert trainer.get_step == time_horizon
    # Check that we can turn off the trainer and that the buffer is cleared
    for _ in range(0, 5):
        trajectory_queue.put(trajectory)
        trainer.advance()
        # Check that there is stuff in the policy queue
        policy_queue.get_nowait()

    # Check that if the policy doesn't update, we don't push it to the queue
    trainer.set_is_policy_updating(False)
    for _ in range(0, 10):
        trajectory_queue.put(trajectory)
        trainer.advance()
        # Check that there nothing  in the policy queue
        with pytest.raises(AgentManagerQueue.Empty):
            policy_queue.get_nowait()

    # Check that the buffer has been cleared
    assert not trainer.should_still_train
    assert mocked_clear_update_buffer.call_count > 0
    assert mocked_save_model.call_count == 0
Пример #20
0
def test_networkbody_lstm():
    torch.manual_seed(0)
    obs_size = 4
    seq_len = 16
    network_settings = NetworkSettings(memory=NetworkSettings.MemorySettings(
        sequence_length=seq_len, memory_size=12))
    obs_shapes = [(obs_size, )]

    networkbody = NetworkBody(create_sensor_specs_with_shapes(obs_shapes),
                              network_settings)
    optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-4)
    sample_obs = torch.ones((1, seq_len, obs_size))

    for _ in range(200):
        encoded, _ = networkbody([sample_obs],
                                 memories=torch.ones(1, seq_len, 12))
        # Try to force output to 1
        loss = torch.nn.functional.mse_loss(encoded, torch.ones(encoded.shape))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # In the last step, values should be close to 1
    for _enc in encoded.flatten():
        assert _enc == pytest.approx(1.0, abs=0.1)
Пример #21
0
CONTINUOUS_PATH = (os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                os.pardir, os.pardir) + "/test.demo")
DISCRETE_PATH = (os.path.join(os.path.dirname(os.path.abspath(__file__)),
                              os.pardir, os.pardir) + "/testdcvis.demo")
SEED = [42]

ACTIONSPEC_CONTINUOUS = ActionSpec.create_continuous(2)
ACTIONSPEC_FOURDISCRETE = ActionSpec.create_discrete((2, 3, 3, 3))
ACTIONSPEC_DISCRETE = ActionSpec.create_discrete((20, ))


@pytest.mark.parametrize(
    "behavior_spec",
    [
        BehaviorSpec(create_sensor_specs_with_shapes([(8, )]),
                     ACTIONSPEC_CONTINUOUS)
    ],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
    gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)
    gail_rp = GAILRewardProvider(behavior_spec, gail_settings)
    assert gail_rp.name == "GAIL"


@pytest.mark.parametrize(
    "behavior_spec",
    [
        BehaviorSpec(create_sensor_specs_with_shapes([(8, )]),
                     ACTIONSPEC_CONTINUOUS)
    ],
Пример #22
0
    create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
from mlagents.trainers.settings import RewardSignalSettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
    create_agent_buffer, )
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes

ACTIONSPEC_CONTINUOUS = ActionSpec.create_continuous(5)
ACTIONSPEC_TWODISCRETE = ActionSpec.create_discrete((2, 3))


@pytest.mark.parametrize(
    "behavior_spec",
    [
        BehaviorSpec(create_sensor_specs_with_shapes([(10, )]),
                     ACTIONSPEC_CONTINUOUS),
        BehaviorSpec(create_sensor_specs_with_shapes([(10, )]),
                     ACTIONSPEC_TWODISCRETE),
    ],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
    settings = RewardSignalSettings()
    settings.gamma = 0.2
    extrinsic_rp = ExtrinsicRewardProvider(behavior_spec, settings)
    assert extrinsic_rp.gamma == 0.2
    assert extrinsic_rp.name == "Extrinsic"


@pytest.mark.parametrize(
    "behavior_spec",
Пример #23
0
from mlagents.trainers.settings import RNDSettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
    create_agent_buffer, )
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes

SEED = [42]

ACTIONSPEC_CONTINUOUS = ActionSpec.create_continuous(5)
ACTIONSPEC_TWODISCRETE = ActionSpec.create_discrete((2, 3))
ACTIONSPEC_DISCRETE = ActionSpec.create_discrete((2, ))


@pytest.mark.parametrize(
    "behavior_spec",
    [
        BehaviorSpec(create_sensor_specs_with_shapes([(10, )]),
                     ACTIONSPEC_CONTINUOUS),
        BehaviorSpec(create_sensor_specs_with_shapes([(10, )]),
                     ACTIONSPEC_TWODISCRETE),
    ],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
    curiosity_settings = RNDSettings(32, 0.01)
    curiosity_settings.strength = 0.1
    curiosity_rp = RNDRewardProvider(behavior_spec, curiosity_settings)
    assert curiosity_rp.strength == 0.1
    assert curiosity_rp.name == "RND"


@pytest.mark.parametrize(
    "behavior_spec",