示例#1
0
def test_stepsequence_from_pandas(mock_data, given_rewards: bool):
    rewards, states, observations, actions, hidden, policy_infos = mock_data
    states = np.asarray(states)
    observations = np.asarray(observations)
    actions = to.stack(actions).numpy()
    rewards = np.asarray(rewards)

    # Create fake observed data set. The labels must match the labels of the spaces. The order can be mixed.
    content = dict(
        s0=states[:, 0],
        s1=states[:, 1],
        s2=states[:, 2],
        o3=observations[:, 3],
        o0=observations[:, 0],
        o2=observations[:, 2],
        o1=observations[:, 1],
        a1=actions[:, 1],
        a0=actions[:, 0],
        # Some content that was not in
        steps=np.arange(0, states.shape[0]),
        infos=[dict(foo="bar")] * 6,
    )
    if given_rewards:
        content["rewards"] = rewards
    df = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in content.items()]))

    env = MockEnv(
        state_space=InfBoxSpace(shape=states[0].shape,
                                labels=["s0", "s1", "s2"]),
        obs_space=InfBoxSpace(shape=observations[0].shape,
                              labels=["o0", "o1", "o2", "o3"]),
        act_space=InfBoxSpace(shape=actions[0].shape, labels=["a0", "a1"]),
    )

    reconstructed = StepSequence.from_pandas(df, env.spec)

    assert len(reconstructed.rewards) == len(rewards)
    assert np.allclose(reconstructed.states, states)
    assert np.allclose(reconstructed.observations, observations)
    assert np.allclose(reconstructed.actions, actions)
示例#2
0
def test_reset():
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )),
                      obs_space=BoxSpace(-1, 1, shape=(2, )))
    wenv = DownsamplingWrapper(mockenv, factor=2)

    # Perform some actions
    wenv.step(np.array([0, 4]))
    assert mockenv.last_act == [0, 4]
    wenv.step(np.array([4, 4]))
    assert mockenv.last_act == [0, 4]
    wenv.step(np.array([4, 4]))
    assert mockenv.last_act == [4, 4]

    # The next action would be [4, 4] again, but now we reset
    wenv.reset()
    assert wenv._act_last is None
    assert wenv._cnt == 0

    wenv.step(np.array([1, 2]))
    assert mockenv.last_act == [1, 2]
    wenv.step(np.array([2, 3]))
    assert mockenv.last_act == [1, 2]
示例#3
0
def test_domain_param():
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )))
    wenv = ActDelayWrapper(mockenv, delay=1)

    # Reset to initialize buffer
    wenv.reset()

    # Perform some actions
    wenv.step(np.array([0, 1]))
    assert mockenv.last_act == [0, 0]
    wenv.step(np.array([2, 4]))
    assert mockenv.last_act == [0, 1]

    # change the delay and reset
    wenv.domain_param = {'act_delay': 2}
    wenv.reset()

    wenv.step(np.array([1, 2]))
    assert mockenv.last_act == [0, 0]
    wenv.step(np.array([2, 3]))
    assert mockenv.last_act == [0, 0]
    wenv.step(np.array([8, 9]))
    assert mockenv.last_act == [1, 2]
示例#4
0
def test_indi_nonlin_layer(in_features, same_nonlin, bias, weight):
    if not same_nonlin and in_features > 1:
        nonlin = in_features*[to.tanh]
    else:
        nonlin = to.sigmoid
    layer = IndiNonlinLayer(in_features, nonlin, bias, weight)
    assert isinstance(layer, nn.Module)

    i = to.randn(in_features)
    o = layer(i)
    assert isinstance(o, to.Tensor)
    assert i.shape == o.shape


@pytest.mark.parametrize(
    'env', [MockEnv(obs_space=InfBoxSpace(shape=1), act_space=InfBoxSpace(shape=1))]
)
@pytest.mark.parametrize(
    'policy', [
        # Two-headed policies are not supported
        'rnn_policy',
        'lstm_policy',
        'gru_policy',
        'adn_policy',
        'nf_policy',
    ],
    ids=['rnn', 'lstm', 'gru', 'adn', 'nf'],
    indirect=True
)
@pytest.mark.parametrize('windowed', [True, False],
                         ids=['windowed', 'not_windowed'])