def test_PER(self): rew_func = lambda s, a, g: -1 * (s != g) batch_size = 4 hrb = HindsightReplayBuffer(size=10, env_dict={ "obs": {}, "act": {}, "next_obs": {} }, max_episode_len=2, strategy="future", reward_func=rew_func, additional_goals=2, prioritized=True) hrb.add(obs=0, act=0, next_obs=1) hrb.add(obs=1, act=0, next_obs=2) hrb.on_episode_end(3) self.assertEqual(hrb.get_stored_size(), 6) sample = hrb.sample(batch_size) hrb.update_priorities(indexes=sample["indexes"], priorities=np.zeros_like(sample["indexes"], dtype=np.float))
def test_goal_final(self): rew_func = lambda s, a, g: -1 * (s[:, :3] != g).any(axis=1) goal_func = lambda s: s[:, :3] hrb = HindsightReplayBuffer(10, { "obs": { "shape": 5 }, "act": {}, "next_obs": { "shape": 5 } }, max_episode_len=10, reward_func=rew_func, goal_func=goal_func, goal_shape=3, strategy="final") hrb.add(obs=(0, 0, 0, 0, 0), act=0, next_obs=(1, 1, 1, 1, 1)) hrb.add(obs=(1, 1, 1, 1, 1), act=0, next_obs=(2, 2, 2, 2, 2)) self.assertEqual(hrb.get_stored_size(), 0) hrb.on_episode_end((3, 3, 3)) self.assertEqual(hrb.get_stored_size(), 4) sample = hrb.get_all_transitions() self.assertIn("goal", sample) self.assertEqual(sample["goal"].shape, (4, 3)) np.testing.assert_allclose( sample["goal"], [[3, 3, 3], [3, 3, 3], [2, 2, 2], [2, 2, 2]])
def test_get_buffer_size(self): buffer_size = 10 hrb = HindsightReplayBuffer(size=buffer_size, env_dict={ "obs": {}, "act": {}, "next_obs": {} }, max_episode_len=2, reward_func=lambda s, a, g: -1 * (s != g), additional_goals=1, prioritized=False) self.assertEqual(hrb.get_buffer_size(), buffer_size)
def test_episode(self): rew_func = lambda s, a, g: -1 * (s != g) hrb = HindsightReplayBuffer(size=10, env_dict={ "obs": {}, "act": {}, "next_obs": {} }, max_episode_len=2, strategy="episode", reward_func=rew_func, additional_goals=2, prioritized=False) hrb.add(obs=0, act=0, next_obs=1) hrb.add(obs=1, act=0, next_obs=2) hrb.on_episode_end(3) self.assertEqual(hrb.get_stored_size(), 6) sample = hrb.get_all_transitions() self.assertIn("rew", sample) self.assertIn("goal", sample) self.assertEqual(sample["obs"].shape, (6, 1)) np.testing.assert_allclose( sample["rew"], rew_func(sample["next_obs"], sample["act"], sample["goal"]))
def test_unknown_strategy(self): rew_func = lambda s, a, g: -1 * (s != g) with self.assertRaises(ValueError): hrb = HindsightReplayBuffer(size=10, env_dict={ "obs": {}, "act": {}, "next_obs": {} }, max_episode_len=2, strategy="__UNKNOWN_STRATEGY__", reward_func=rew_func, additional_goals=2, prioritized=False)
def test_assert_PER(self): rew_func = lambda s, a, g: -1 * (s != g) hrb = HindsightReplayBuffer(size=10, env_dict={ "obs": {}, "act": {}, "next_obs": {} }, max_episode_len=2, strategy="future", reward_func=rew_func, additional_goals=2, prioritized=False) hrb.add(obs=0, act=0, next_obs=1) hrb.add(obs=1, act=0, next_obs=2) with self.assertRaises(ValueError): hrb.get_max_priority() with self.assertRaises(ValueError): hrb.update_priorities([], [])
"act":{"shape": 1,"dtype": np.ubyte}, "next_obs": {"shape": env.observation_space.shape}} discount = tf.constant(gamma) # Prioritized Experience Replay: https://arxiv.org/abs/1511.05952 # See https://ymd_h.gitlab.io/cpprb/features/per/ prioritized = True # Hindsigh Experience Replay : https://arxiv.org/abs/1707.01495 # See https://ymd_h.gitlab.io/cpprb/features/her/ rb = HindsightReplayBuffer(buffer_size, env_dict, max_episode_len = max_episode_len, rewward_func = lambda s,a,g: -1*((s!=g).any(axis=1)), prioritized = prioritized) if prioritized: # Beta linear annealing beta = 0.4 beta_step = (1 - beta)/N_iteration def sg(state, goal): state = state.reshape((state.shape[0], -1)) goal = goal.reshape((goal.shape[0], -1)) return tf.constant(np.concatenate((state, goal), axis=1), dtype=tf.float32) @tf.function def Q_func(model,obs,act,act_shape):
def test_stored_size(self): """ Test get_stored_size() method """ hrb = HindsightReplayBuffer(size=10, env_dict={ "obs": {}, "act": {}, "next_obs": {} }, max_episode_len=2, reward_func=lambda s, a, g: -1 * (s != g), additional_goals=1, prioritized=False) # Buffer is initialized without data self.assertEqual(hrb.get_stored_size(), 0) self.assertEqual(hrb.additional_goals, 1) # During episode, stored size doesn't increase hrb.add(obs=0, act=0, next_obs=0) self.assertEqual(hrb.get_stored_size(), 0) # On episode end, stored size increases by `episode_len * additional_goals` hrb.on_episode_end(1) self.assertEqual(hrb.get_stored_size(), 2) # If no transactions in the current episode, nothing happens hrb.on_episode_end(1) self.assertEqual(hrb.get_stored_size(), 2) # By calling clear(), stored size become 0 again. hrb.clear() self.assertEqual(hrb.get_stored_size(), 0)