def __init__(self, policy, env, n_envs, replay_pool_size, max_path_length, sampling_method, save_rollouts=False, save_rollouts_observations=True, save_env_infos=False, env_str=None, replay_pool_params={}): self._policy = policy self._n_envs = n_envs assert (self._n_envs == 1) # b/c policy reset self._replay_pools = [ RNNCriticReplayPool( env.spec, env.horizon, policy.N, policy.gamma, replay_pool_size // n_envs, obs_history_len=policy.obs_history_len, sampling_method=sampling_method, save_rollouts=save_rollouts, save_rollouts_observations=save_rollouts_observations, save_env_infos=save_env_infos, replay_pool_params=replay_pool_params) for _ in range(n_envs) ] try: envs = [ pickle.loads(pickle.dumps(env)) for _ in range(self._n_envs) ] if self._n_envs > 1 else [env] except: envs = [create_env(env_str) for _ in range(self._n_envs) ] if self._n_envs > 1 else [env] ### need to seed each environment if it is GymEnv seed = get_seed() if seed is not None and isinstance(utils.inner_env(env), GymEnv): for i, env in enumerate(envs): utils.inner_env(env).env.seed(seed + i) self._vec_env = VecEnvExecutor(envs=envs, max_path_length=max_path_length) self._curr_observations = self._vec_env.reset()
def statistics(self): return RNNCriticReplayPool.statistics_pools(self._replay_pools)
def get_recent_paths(self): return RNNCriticReplayPool.get_recent_paths_pools(self._replay_pools)
def log(self, prefix=''): RNNCriticReplayPool.log_pools(self._replay_pools, self._nb_target_lost, prefix=prefix)
def sample(self, batch_size): return RNNCriticReplayPool.sample_pools(self._replay_pools, batch_size, only_completed_episodes=self._policy.only_completed_episodes)
def log(self, prefix=''): RNNCriticReplayPool.log_pools(self._replay_pools, prefix=prefix)