def test_collector(): env_fns = [ lambda: MyTestEnv(size=2, sleep=0), lambda: MyTestEnv(size=3, sleep=0), lambda: MyTestEnv(size=4, sleep=0), lambda: MyTestEnv(size=5, sleep=0), ] venv = SubprocVectorEnv(env_fns) policy = MyPolicy() env = env_fns[0]() c0 = Collector(policy, env, ReplayBuffer(size=100, ignore_obs_next=False)) c0.collect(n_step=3) assert equal(c0.buffer.obs[:3], [0, 1, 0]) assert equal(c0.buffer[:3].obs_next, [1, 2, 1]) c0.collect(n_episode=3) assert equal(c0.buffer.obs[:8], [0, 1, 0, 1, 0, 1, 0, 1]) assert equal(c0.buffer[:8].obs_next, [1, 2, 1, 2, 1, 2, 1, 2]) c1 = Collector(policy, venv, ReplayBuffer(size=100, ignore_obs_next=False)) c1.collect(n_step=6) assert equal(c1.buffer.obs[:11], [0, 1, 0, 1, 2, 0, 1, 0, 1, 2, 3]) assert equal(c1.buffer[:11].obs_next, [1, 2, 1, 2, 3, 1, 2, 1, 2, 3, 4]) c1.collect(n_episode=2) assert equal(c1.buffer.obs[11:21], [0, 1, 2, 3, 4, 0, 1, 0, 1, 2]) assert equal(c1.buffer[11:21].obs_next, [1, 2, 3, 4, 5, 1, 2, 1, 2, 3]) c2 = Collector(policy, venv, ReplayBuffer(size=100, ignore_obs_next=False)) c2.collect(n_episode=[1, 2, 2, 2]) assert equal(c2.buffer.obs_next[:26], [ 1, 2, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5]) c2.reset_env() c2.collect(n_episode=[2, 2, 2, 2]) assert equal(c2.buffer.obs_next[26:54], [ 1, 2, 1, 2, 3, 1, 2, 1, 2, 3, 4, 1, 2, 3, 4, 5, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5])
def test_episode( policy: BasePolicy, collector: Collector, test_fn: Optional[Callable[[int, Optional[int]], None]], epoch: int, n_episode: Union[int, List[int]], writer: Optional[SummaryWriter] = None, global_step: Optional[int] = None, ) -> Dict[str, float]: """A simple wrapper of testing policy in collector.""" collector.reset_env() collector.reset_buffer() policy.eval() if test_fn: test_fn(epoch, global_step) if collector.get_env_num() > 1 and isinstance(n_episode, int): n = collector.get_env_num() n_ = np.zeros(n) + n_episode // n n_[:n_episode % n] += 1 n_episode = list(n_) result = collector.collect(n_episode=n_episode) if writer is not None and global_step is not None: for k in result.keys(): writer.add_scalar("test/" + k, result[k], global_step=global_step) return result
def test_collector(): writer = SummaryWriter('log/collector') logger = Logger(writer) env_fns = [lambda x=i: MyTestEnv(size=x, sleep=0) for i in [2, 3, 4, 5]] venv = SubprocVectorEnv(env_fns) dum = DummyVectorEnv(env_fns) policy = MyPolicy() env = env_fns[0]() c0 = Collector(policy, env, ReplayBuffer(size=100), logger.preprocess_fn) c0.collect(n_step=3) assert len(c0.buffer) == 3 assert np.allclose(c0.buffer.obs[:4, 0], [0, 1, 0, 0]) assert np.allclose(c0.buffer[:].obs_next[..., 0], [1, 2, 1]) c0.collect(n_episode=3) assert len(c0.buffer) == 8 assert np.allclose(c0.buffer.obs[:10, 0], [0, 1, 0, 1, 0, 1, 0, 1, 0, 0]) assert np.allclose(c0.buffer[:].obs_next[..., 0], [1, 2, 1, 2, 1, 2, 1, 2]) c0.collect(n_step=3, random=True) c1 = Collector(policy, venv, VectorReplayBuffer(total_size=100, buffer_num=4), logger.preprocess_fn) c1.collect(n_step=8) obs = np.zeros(100) obs[[0, 1, 25, 26, 50, 51, 75, 76]] = [0, 1, 0, 1, 0, 1, 0, 1] assert np.allclose(c1.buffer.obs[:, 0], obs) assert np.allclose(c1.buffer[:].obs_next[..., 0], [1, 2, 1, 2, 1, 2, 1, 2]) c1.collect(n_episode=4) assert len(c1.buffer) == 16 obs[[2, 3, 27, 52, 53, 77, 78, 79]] = [0, 1, 2, 2, 3, 2, 3, 4] assert np.allclose(c1.buffer.obs[:, 0], obs) assert np.allclose(c1.buffer[:].obs_next[..., 0], [1, 2, 1, 2, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5]) c1.collect(n_episode=4, random=True) c2 = Collector(policy, dum, VectorReplayBuffer(total_size=100, buffer_num=4), logger.preprocess_fn) c2.collect(n_episode=7) obs1 = obs.copy() obs1[[4, 5, 28, 29, 30]] = [0, 1, 0, 1, 2] obs2 = obs.copy() obs2[[28, 29, 30, 54, 55, 56, 57]] = [0, 1, 2, 0, 1, 2, 3] c2obs = c2.buffer.obs[:, 0] assert np.all(c2obs == obs1) or np.all(c2obs == obs2) c2.reset_env() c2.reset_buffer() assert c2.collect(n_episode=8)['n/ep'] == 8 obs[[4, 5, 28, 29, 30, 54, 55, 56, 57]] = [0, 1, 0, 1, 2, 0, 1, 2, 3] assert np.all(c2.buffer.obs[:, 0] == obs) c2.collect(n_episode=4, random=True) # test corner case with pytest.raises(TypeError): Collector(policy, dum, ReplayBuffer(10)) with pytest.raises(TypeError): Collector(policy, dum, PrioritizedReplayBuffer(10, 0.5, 0.5)) with pytest.raises(TypeError): c2.collect()
def test_episode(policy: BasePolicy, collector: Collector, test_fn: Callable[[int], None], epoch: int, n_episode: Union[int, List[int]]) -> Dict[str, float]: """A simple wrapper of testing policy in collector.""" collector.reset_env() collector.reset_buffer() policy.eval() if test_fn: test_fn(epoch) if collector.get_env_num() > 1 and np.isscalar(n_episode): n = collector.get_env_num() n_ = np.zeros(n) + n_episode // n n_[:n_episode % n] += 1 n_episode = list(n_) return collector.collect(n_episode=n_episode)
def test_collector(): writer = SummaryWriter('log/collector') logger = Logger(writer) env_fns = [lambda x=i: MyTestEnv(size=x, sleep=0) for i in [2, 3, 4, 5]] venv = SubprocVectorEnv(env_fns) dum = DummyVectorEnv(env_fns) policy = MyPolicy() env = env_fns[0]() c0 = Collector(policy, env, ReplayBuffer(size=100, ignore_obs_next=False), logger.preprocess_fn) c0.collect(n_step=3) assert np.allclose(c0.buffer.obs[:4], np.expand_dims([0, 1, 0, 1], axis=-1)) assert np.allclose(c0.buffer[:4].obs_next, np.expand_dims([1, 2, 1, 2], axis=-1)) c0.collect(n_episode=3) assert np.allclose(c0.buffer.obs[:10], np.expand_dims([0, 1, 0, 1, 0, 1, 0, 1, 0, 1], axis=-1)) assert np.allclose(c0.buffer[:10].obs_next, np.expand_dims([1, 2, 1, 2, 1, 2, 1, 2, 1, 2], axis=-1)) c0.collect(n_step=3, random=True) c1 = Collector(policy, venv, ReplayBuffer(size=100, ignore_obs_next=False), logger.preprocess_fn) c1.collect(n_step=6) assert np.allclose(c1.buffer.obs[:11], np.expand_dims( [0, 1, 0, 1, 2, 0, 1, 0, 1, 2, 3], axis=-1)) assert np.allclose(c1.buffer[:11].obs_next, np.expand_dims( [1, 2, 1, 2, 3, 1, 2, 1, 2, 3, 4], axis=-1)) c1.collect(n_episode=2) assert np.allclose(c1.buffer.obs[11:21], np.expand_dims([0, 1, 2, 3, 4, 0, 1, 0, 1, 2], axis=-1)) assert np.allclose(c1.buffer[11:21].obs_next, np.expand_dims([1, 2, 3, 4, 5, 1, 2, 1, 2, 3], axis=-1)) c1.collect(n_episode=3, random=True) c2 = Collector(policy, dum, ReplayBuffer(size=100, ignore_obs_next=False), logger.preprocess_fn) c2.collect(n_episode=[1, 2, 2, 2]) assert np.allclose(c2.buffer.obs_next[:26], np.expand_dims([ 1, 2, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5], axis=-1)) c2.reset_env() c2.collect(n_episode=[2, 2, 2, 2]) assert np.allclose(c2.buffer.obs_next[26:54], np.expand_dims([ 1, 2, 1, 2, 3, 1, 2, 1, 2, 3, 4, 1, 2, 3, 4, 5, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5], axis=-1)) c2.collect(n_episode=[1, 1, 1, 1], random=True)
def test_episode( policy: BasePolicy, collector: Collector, test_fn: Optional[Callable[[int, Optional[int]], None]], epoch: int, n_episode: int, logger: Optional[BaseLogger] = None, global_step: Optional[int] = None, reward_metric: Optional[Callable[[np.ndarray], np.ndarray]] = None, ) -> Dict[str, Any]: """A simple wrapper of testing policy in collector.""" collector.reset_env() collector.reset_buffer() policy.eval() if test_fn: test_fn(epoch, global_step) result = collector.collect(n_episode=n_episode) if reward_metric: result["rews"] = reward_metric(result["rews"]) if logger and global_step is not None: logger.log_test_data(result, global_step) return result
def test_collector(): writer = SummaryWriter('log/collector') logger = Logger(writer) env_fns = [lambda x=i: MyTestEnv(size=x, sleep=0) for i in [2, 3, 4, 5]] venv = SubprocVectorEnv(env_fns) policy = MyPolicy() env = env_fns[0]() c0 = Collector(policy, env, ReplayBuffer(size=100, ignore_obs_next=False), preprocess_fn) c0.collect(n_step=3, log_fn=logger.log) assert np.allclose(c0.buffer.obs[:3], [0, 1, 0]) assert np.allclose(c0.buffer[:3].obs_next, [1, 2, 1]) c0.collect(n_episode=3, log_fn=logger.log) assert np.allclose(c0.buffer.obs[:8], [0, 1, 0, 1, 0, 1, 0, 1]) assert np.allclose(c0.buffer[:8].obs_next, [1, 2, 1, 2, 1, 2, 1, 2]) c1 = Collector(policy, venv, ReplayBuffer(size=100, ignore_obs_next=False), preprocess_fn) c1.collect(n_step=6) assert np.allclose(c1.buffer.obs[:11], [0, 1, 0, 1, 2, 0, 1, 0, 1, 2, 3]) assert np.allclose(c1.buffer[:11].obs_next, [1, 2, 1, 2, 3, 1, 2, 1, 2, 3, 4]) c1.collect(n_episode=2) assert np.allclose(c1.buffer.obs[11:21], [0, 1, 2, 3, 4, 0, 1, 0, 1, 2]) assert np.allclose(c1.buffer[11:21].obs_next, [1, 2, 3, 4, 5, 1, 2, 1, 2, 3]) c2 = Collector(policy, venv, ReplayBuffer(size=100, ignore_obs_next=False), preprocess_fn) c2.collect(n_episode=[1, 2, 2, 2]) assert np.allclose(c2.buffer.obs_next[:26], [ 1, 2, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5 ]) c2.reset_env() c2.collect(n_episode=[2, 2, 2, 2]) assert np.allclose(c2.buffer.obs_next[26:54], [ 1, 2, 1, 2, 3, 1, 2, 1, 2, 3, 4, 1, 2, 3, 4, 5, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5 ])
def test_collector(gym_reset_kwargs): writer = SummaryWriter('log/collector') logger = Logger(writer) env_fns = [lambda x=i: MyTestEnv(size=x, sleep=0) for i in [2, 3, 4, 5]] venv = SubprocVectorEnv(env_fns) dum = DummyVectorEnv(env_fns) policy = MyPolicy() env = env_fns[0]() c0 = Collector( policy, env, ReplayBuffer(size=100), logger.preprocess_fn, ) c0.collect(n_step=3, gym_reset_kwargs=gym_reset_kwargs) assert len(c0.buffer) == 3 assert np.allclose(c0.buffer.obs[:4, 0], [0, 1, 0, 0]) assert np.allclose(c0.buffer[:].obs_next[..., 0], [1, 2, 1]) keys = np.zeros(100) keys[:3] = 1 assert np.allclose(c0.buffer.info["key"], keys) for e in c0.buffer.info["env"][:3]: assert isinstance(e, MyTestEnv) assert np.allclose(c0.buffer.info["env_id"], 0) rews = np.zeros(100) rews[:3] = [0, 1, 0] assert np.allclose(c0.buffer.info["rew"], rews) c0.collect(n_episode=3, gym_reset_kwargs=gym_reset_kwargs) assert len(c0.buffer) == 8 assert np.allclose(c0.buffer.obs[:10, 0], [0, 1, 0, 1, 0, 1, 0, 1, 0, 0]) assert np.allclose(c0.buffer[:].obs_next[..., 0], [1, 2, 1, 2, 1, 2, 1, 2]) assert np.allclose(c0.buffer.info["key"][:8], 1) for e in c0.buffer.info["env"][:8]: assert isinstance(e, MyTestEnv) assert np.allclose(c0.buffer.info["env_id"][:8], 0) assert np.allclose(c0.buffer.info["rew"][:8], [0, 1, 0, 1, 0, 1, 0, 1]) c0.collect(n_step=3, random=True, gym_reset_kwargs=gym_reset_kwargs) c1 = Collector(policy, venv, VectorReplayBuffer(total_size=100, buffer_num=4), logger.preprocess_fn) c1.collect(n_step=8, gym_reset_kwargs=gym_reset_kwargs) obs = np.zeros(100) valid_indices = [0, 1, 25, 26, 50, 51, 75, 76] obs[valid_indices] = [0, 1, 0, 1, 0, 1, 0, 1] assert np.allclose(c1.buffer.obs[:, 0], obs) assert np.allclose(c1.buffer[:].obs_next[..., 0], [1, 2, 1, 2, 1, 2, 1, 2]) keys = np.zeros(100) keys[valid_indices] = [1, 1, 1, 1, 1, 1, 1, 1] assert np.allclose(c1.buffer.info["key"], keys) for e in c1.buffer.info["env"][valid_indices]: assert isinstance(e, MyTestEnv) env_ids = np.zeros(100) env_ids[valid_indices] = [0, 0, 1, 1, 2, 2, 3, 3] assert np.allclose(c1.buffer.info["env_id"], env_ids) rews = np.zeros(100) rews[valid_indices] = [0, 1, 0, 0, 0, 0, 0, 0] assert np.allclose(c1.buffer.info["rew"], rews) c1.collect(n_episode=4, gym_reset_kwargs=gym_reset_kwargs) assert len(c1.buffer) == 16 valid_indices = [2, 3, 27, 52, 53, 77, 78, 79] obs[[2, 3, 27, 52, 53, 77, 78, 79]] = [0, 1, 2, 2, 3, 2, 3, 4] assert np.allclose(c1.buffer.obs[:, 0], obs) assert np.allclose(c1.buffer[:].obs_next[..., 0], [1, 2, 1, 2, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5]) keys[valid_indices] = [1, 1, 1, 1, 1, 1, 1, 1] assert np.allclose(c1.buffer.info["key"], keys) for e in c1.buffer.info["env"][valid_indices]: assert isinstance(e, MyTestEnv) env_ids[valid_indices] = [0, 0, 1, 2, 2, 3, 3, 3] assert np.allclose(c1.buffer.info["env_id"], env_ids) rews[valid_indices] = [0, 1, 1, 0, 1, 0, 0, 1] assert np.allclose(c1.buffer.info["rew"], rews) c1.collect(n_episode=4, random=True, gym_reset_kwargs=gym_reset_kwargs) c2 = Collector(policy, dum, VectorReplayBuffer(total_size=100, buffer_num=4), logger.preprocess_fn) c2.collect(n_episode=7, gym_reset_kwargs=gym_reset_kwargs) obs1 = obs.copy() obs1[[4, 5, 28, 29, 30]] = [0, 1, 0, 1, 2] obs2 = obs.copy() obs2[[28, 29, 30, 54, 55, 56, 57]] = [0, 1, 2, 0, 1, 2, 3] c2obs = c2.buffer.obs[:, 0] assert np.all(c2obs == obs1) or np.all(c2obs == obs2) c2.reset_env(gym_reset_kwargs=gym_reset_kwargs) c2.reset_buffer() assert c2.collect(n_episode=8, gym_reset_kwargs=gym_reset_kwargs)['n/ep'] == 8 valid_indices = [4, 5, 28, 29, 30, 54, 55, 56, 57] obs[valid_indices] = [0, 1, 0, 1, 2, 0, 1, 2, 3] assert np.all(c2.buffer.obs[:, 0] == obs) keys[valid_indices] = [1, 1, 1, 1, 1, 1, 1, 1, 1] assert np.allclose(c2.buffer.info["key"], keys) for e in c2.buffer.info["env"][valid_indices]: assert isinstance(e, MyTestEnv) env_ids[valid_indices] = [0, 0, 1, 1, 1, 2, 2, 2, 2] assert np.allclose(c2.buffer.info["env_id"], env_ids) rews[valid_indices] = [0, 1, 0, 0, 1, 0, 0, 0, 1] assert np.allclose(c2.buffer.info["rew"], rews) c2.collect(n_episode=4, random=True, gym_reset_kwargs=gym_reset_kwargs) # test corner case with pytest.raises(TypeError): Collector(policy, dum, ReplayBuffer(10)) with pytest.raises(TypeError): Collector(policy, dum, PrioritizedReplayBuffer(10, 0.5, 0.5)) with pytest.raises(TypeError): c2.collect() # test NXEnv for obs_type in ["array", "object"]: envs = SubprocVectorEnv( [lambda i=x, t=obs_type: NXEnv(i, t) for x in [5, 10, 15, 20]]) c3 = Collector(policy, envs, VectorReplayBuffer(total_size=100, buffer_num=4)) c3.collect(n_step=6, gym_reset_kwargs=gym_reset_kwargs) assert c3.buffer.obs.dtype == object