def test_episode(data_size, observation_size, action_size): observations = np.random.random((data_size, observation_size)).astype("f4") actions = np.random.random((data_size, action_size)).astype("f4") rewards = np.random.random(data_size).astype("f4") episode = Episode( observation_shape=(observation_size, ), action_size=action_size, observations=observations, actions=actions, rewards=rewards, ) # check Episode methods assert np.all(episode.observations == observations) assert np.all(episode.actions == actions) assert np.all(episode.rewards == rewards) assert episode.size() == data_size - 1 assert episode.get_observation_shape() == (observation_size, ) assert episode.get_action_size() == action_size assert episode.compute_return() == np.sum(rewards[1:]) # check transitions exported from episode assert len(episode.transitions) == data_size - 1 for i, t in enumerate(episode.transitions): assert isinstance(t, Transition) assert t.get_observation_shape() == (observation_size, ) assert t.get_action_size() == action_size assert np.all(t.observation == observations[i]) assert np.all(t.action == actions[i]) assert np.allclose(t.reward, rewards[i]) assert np.all(t.next_observation == observations[i + 1]) assert np.all(t.next_action == actions[i + 1]) assert np.allclose(t.next_reward, rewards[i + 1]) assert t.terminal == (1.0 if (i == data_size - 2) else 0.0) # check forward pointers count = 1 transition = episode[0] while transition.next_transition: transition = transition.next_transition count += 1 assert count == data_size - 1 # check backward pointers count = 1 transition = episode[-1] while transition.prev_transition: transition = transition.prev_transition count += 1 assert count == data_size - 1 # check list-like bahaviors assert len(episode) == data_size - 1 assert episode[0] is episode.transitions[0] for i, transition in enumerate(episode): assert isinstance(transition, Transition) assert transition is episode.transitions[i]
def test_episode(data_size, observation_size, action_size, gamma): observations = np.random.random((data_size, observation_size)) actions = np.random.random((data_size, action_size)) rewards = np.random.random((data_size, 1)) episode = Episode((observation_size, ), action_size, observations, actions, rewards, gamma) # check Episode methods assert np.all(episode.observations == observations) assert np.all(episode.actions == actions) assert np.all(episode.rewards == rewards) assert episode.size() == data_size - 1 assert episode.get_observation_shape() == (observation_size, ) assert episode.get_action_size() == action_size assert episode.compute_return() == np.sum(rewards[1:]) # check transitions exported from episode assert len(episode.transitions) == data_size - 1 for i, t in enumerate(episode.transitions): assert isinstance(t, Transition) assert t.observation_shape == (observation_size, ) assert t.action_size == action_size assert np.all(t.observation == observations[i]) assert np.all(t.action == actions[i]) assert t.reward == rewards[i] assert np.all(t.next_observation == observations[i + 1]) assert np.all(t.next_action == actions[i + 1]) assert t.next_reward == rewards[i + 1] assert t.terminal == (1.0 if (i == data_size - 2) else 0.0) assert len(t.returns) == data_size - i - 1 assert len(t.consequent_observations) == data_size - i - 1 # check returns ref_return = 0.0 for j, ret in enumerate(t.returns): print(t.returns) ref_return += (gamma**j) * rewards[i + 1 + j][0] assert ret == ref_return # check list-like bahaviors assert len(episode) == data_size - 1 assert episode[0] is episode.transitions[0] for i, transition in enumerate(episode): assert isinstance(transition, Transition) assert transition is episode.transitions[i]