def add( self, transition: Tuple[np.ndarray, np.ndarray, float, np.ndarray, bool] ) -> Tuple[Any, ...]: """Add a new experience to memory. If the buffer is empty, it is respectively initialized by size of arguments. """ self.n_step_buffer.append(transition) # single step transition is not ready if len(self.n_step_buffer) < self.n_step: return () if self.length == 0: state, action = transition[:2] self._initialize_buffers(state, action) # add a multi step transition reward, next_state, done = get_n_step_info(self.n_step_buffer, self.gamma) curr_state, action = self.n_step_buffer[0][:2] self.obs_buf[self.idx] = curr_state self.acts_buf[self.idx] = action self.rews_buf[self.idx] = reward self.next_obs_buf[self.idx] = next_state self.done_buf[self.idx] = done self.idx += 1 self.idx = self.demo_size if self.idx % self.buffer_size == 0 else self.idx self.length = min(self.length + 1, self.buffer_size) # return a single step transition to insert to replay buffer return self.n_step_buffer[0]
def check_case2(maxlen: int): """Test when there are no terminal within n_step.""" done_index = maxlen n_step_buffer = generate_dummy_buffer(maxlen, done_index) reward, next_state, _ = get_n_step_info(n_step_buffer, gamma=1) assert reward * 2 == maxlen * (maxlen - 1) assert next_state == maxlen
def check_case3(maxlen: int): """Test when the terminal states exist within n_step.""" done_index = random.randint(1, maxlen - 1) n_step_buffer = generate_dummy_buffer(maxlen, done_index) reward, next_state, _ = get_n_step_info(n_step_buffer, gamma=1) assert reward * 2 == done_index * (done_index + 1) assert next_state == done_index + 1
def check_case1(maxlen: int): """Test when the transition is terminal state.""" done_index = 0 n_step_buffer = generate_dummy_buffer(maxlen, done_index) reward, next_state, _ = get_n_step_info(n_step_buffer, gamma=1) assert reward == done_index assert next_state == done_index + 1
def add( self, transition: Tuple[np.ndarray, np.ndarray, torch.Tensor, float, np.ndarray, bool], ) -> Tuple[Any, ...]: # delete here """Add a new experience to memory. If the buffer is empty, it is respectively initialized by size of arguments. Add transitions to local buffer until it's full, and move thoese transitions to global buffer. """ self.n_step_buffer.append(transition) # single step transition is not ready if len(self.n_step_buffer) < self.n_step: return () if self.length == 0 and self.idx == 0: state, action, hidden_state = transition[:3] self._initialize_buffers(state, action, hidden_state) # add a multi step transition reward, _, done = get_n_step_info(self.n_step_buffer, self.gamma) curr_state, action, hidden_state = self.n_step_buffer[0][:3] self.local_obs_buf[self.idx] = curr_state self.local_acts_buf[self.idx] = action self.local_rews_buf[self.idx] = reward self.local_hiddens_buf[self.idx] = hidden_state self.local_done_buf[self.idx] = done self.idx += 1 if done and self.idx < self.sequence_size: self.idx = self.sequence_size if self.idx % self.sequence_size == 0: self.obs_buf[self.episode_idx] = self.local_obs_buf self.acts_buf[self.episode_idx] = self.local_acts_buf self.rews_buf[self.episode_idx] = self.local_rews_buf self.hiddens_buf[self.episode_idx] = self.local_hiddens_buf self.done_buf[self.episode_idx] = self.local_done_buf self.idx = self.overlap_size self.episode_idx += 1 self._overlap_local_buffers() self.episode_idx = (0 if self.episode_idx % self.max_len == 0 else self.episode_idx) self.length = min(self.length + 1, self.max_len) # return a single step transition to insert to replay buffer return self.n_step_buffer[0]