def test_stepsequence_from_pandas(mock_data, given_rewards: bool): rewards, states, observations, actions, hidden, policy_infos = mock_data states = np.asarray(states) observations = np.asarray(observations) actions = to.stack(actions).numpy() rewards = np.asarray(rewards) # Create fake observed data set. The labels must match the labels of the spaces. The order can be mixed. content = dict( s0=states[:, 0], s1=states[:, 1], s2=states[:, 2], o3=observations[:, 3], o0=observations[:, 0], o2=observations[:, 2], o1=observations[:, 1], a1=actions[:, 1], a0=actions[:, 0], # Some content that was not in steps=np.arange(0, states.shape[0]), infos=[dict(foo="bar")] * 6, ) if given_rewards: content["rewards"] = rewards df = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in content.items()])) env = MockEnv( state_space=InfBoxSpace(shape=states[0].shape, labels=["s0", "s1", "s2"]), obs_space=InfBoxSpace(shape=observations[0].shape, labels=["o0", "o1", "o2", "o3"]), act_space=InfBoxSpace(shape=actions[0].shape, labels=["a0", "a1"]), ) reconstructed = StepSequence.from_pandas(df, env.spec) assert len(reconstructed.rewards) == len(rewards) assert np.allclose(reconstructed.states, states) assert np.allclose(reconstructed.observations, observations) assert np.allclose(reconstructed.actions, actions)
def test_reset(): mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )), obs_space=BoxSpace(-1, 1, shape=(2, ))) wenv = DownsamplingWrapper(mockenv, factor=2) # Perform some actions wenv.step(np.array([0, 4])) assert mockenv.last_act == [0, 4] wenv.step(np.array([4, 4])) assert mockenv.last_act == [0, 4] wenv.step(np.array([4, 4])) assert mockenv.last_act == [4, 4] # The next action would be [4, 4] again, but now we reset wenv.reset() assert wenv._act_last is None assert wenv._cnt == 0 wenv.step(np.array([1, 2])) assert mockenv.last_act == [1, 2] wenv.step(np.array([2, 3])) assert mockenv.last_act == [1, 2]
def test_domain_param(): mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, ))) wenv = ActDelayWrapper(mockenv, delay=1) # Reset to initialize buffer wenv.reset() # Perform some actions wenv.step(np.array([0, 1])) assert mockenv.last_act == [0, 0] wenv.step(np.array([2, 4])) assert mockenv.last_act == [0, 1] # change the delay and reset wenv.domain_param = {'act_delay': 2} wenv.reset() wenv.step(np.array([1, 2])) assert mockenv.last_act == [0, 0] wenv.step(np.array([2, 3])) assert mockenv.last_act == [0, 0] wenv.step(np.array([8, 9])) assert mockenv.last_act == [1, 2]
def test_indi_nonlin_layer(in_features, same_nonlin, bias, weight): if not same_nonlin and in_features > 1: nonlin = in_features*[to.tanh] else: nonlin = to.sigmoid layer = IndiNonlinLayer(in_features, nonlin, bias, weight) assert isinstance(layer, nn.Module) i = to.randn(in_features) o = layer(i) assert isinstance(o, to.Tensor) assert i.shape == o.shape @pytest.mark.parametrize( 'env', [MockEnv(obs_space=InfBoxSpace(shape=1), act_space=InfBoxSpace(shape=1))] ) @pytest.mark.parametrize( 'policy', [ # Two-headed policies are not supported 'rnn_policy', 'lstm_policy', 'gru_policy', 'adn_policy', 'nf_policy', ], ids=['rnn', 'lstm', 'gru', 'adn', 'nf'], indirect=True ) @pytest.mark.parametrize('windowed', [True, False], ids=['windowed', 'not_windowed'])