Пример #1
0
def test_convert(mock_data, other_format, tensor_type):
    rewards, states, observations, actions, hidden, policy_infos = mock_data

    ro = StepSequence(
        rewards=rewards,
        observations=observations,
        states=states,
        actions=actions,
        policy_infos=policy_infos,
        hidden=hidden,
        data_format=other_format,
    )
    # convert
    if other_format == "numpy":
        ro.torch()
    elif other_format == "torch":
        ro.numpy()
    # Verify
    assert isinstance(ro.rewards, tensor_type)
    assert isinstance(ro.observations, tensor_type)
    assert isinstance(ro.actions, tensor_type)
    assert isinstance(ro.policy_infos["mean"], tensor_type)
    assert isinstance(ro.policy_infos["std"], tensor_type)
    assert isinstance(ro.hidden[0], tensor_type)

    # Done should always be a ndarray
    assert isinstance(ro.done, np.ndarray)
Пример #2
0
def test_stepsequence_padding(mock_data, data_format: str,
                              pad_value: Union[int, float], pad_len: int):
    # Create too short rollout
    rewards, states, observations, actions, hidden, policy_infos = mock_data
    ro = StepSequence(
        rewards=rewards,
        observations=observations,
        states=states,
        actions=actions,
        hidden=hidden,
        policy_infos=policy_infos,
    )
    len_orig = ro.length

    if data_format == "torch":
        ro.torch()

    # Pad it
    StepSequence.pad(ro, len_to_pad_to=len(ro) + pad_len, pad_value=pad_value)

    # Check
    ro.numpy()  # for simplified checking
    assert np.allclose(ro.states[len_orig + 1:],
                       pad_value * np.ones_like(ro.states[len_orig + 1:]))
    assert np.allclose(
        ro.observations[len_orig + 1:],
        pad_value * np.ones_like(ro.observations[len_orig + 1:]))
    assert np.allclose(ro.actions[len_orig:],
                       pad_value * np.ones_like(ro.actions[len_orig:]))
    assert np.allclose(ro.rewards[len_orig:],
                       pad_value * np.ones_like(ro.rewards[len_orig:]))
    for k, v in ro.policy_infos.items():
        assert np.allclose(v[len_orig:],
                           pad_value * np.ones_like(v[len_orig:]))

    assert ro.length == len_orig + pad_len
    assert all(ro.rollout_bounds == np.array([0, len_orig + pad_len]))

    assert len(ro.states) == len_orig + 8  # check for final step
    assert len(ro.observations) == len_orig + 8  # check for final step
    assert len(ro.actions) == len_orig + pad_len
    assert len(ro.rewards) == len_orig + pad_len
    for h in ro.hidden:
        assert len(h) == len_orig + pad_len
Пример #3
0
def test_convert(other_format, tensor_type):
    ro = StepSequence(rewards=rewards,
                      observations=observations,
                      actions=actions,
                      policy_infos=policy_infos,
                      hidden=hidden,
                      data_format=other_format)
    # convert
    if other_format == 'numpy':
        ro.torch()
    elif other_format == 'torch':
        ro.numpy()
    # Verify
    assert isinstance(ro.rewards, tensor_type)
    assert isinstance(ro.observations, tensor_type)
    assert isinstance(ro.actions, tensor_type)
    assert isinstance(ro.policy_infos['mean'], tensor_type)
    assert isinstance(ro.policy_infos['std'], tensor_type)
    assert isinstance(ro.hidden[0], tensor_type)

    # Done should always be a ndarray
    assert isinstance(ro.done, np.ndarray)