def append_samples(self, samples): """Write the samples into the buffer and advance the time cursor. Handle wrapping of the cursor if necessary (boundary doesn't need to align with length of ``samples``). Compute and store returns with newly available rewards.""" T, B = get_leading_dims(samples, n_dim=2) # samples.env.reward.shape[:2] assert B == self.B t = self.t #! Now wrapping, assuming we have fix_size filled, so wrap above fix_size t_adv = 0 if t + T > self.T: # Wrap. # idxs = np.arange(t, t + T) % self.T num_miss = t + T - self.T idxs = np.concatenate( (np.arange(t, self.T), np.arange(self.fix_T, self.fix_T + num_miss))) # print(len(idxs), t, T, self.T, num_miss) t_adv = self.fix_T else: idxs = slice(t, t + T) self.samples[idxs] = samples self.compute_returns(T) if not self._buffer_full and t + T >= self.T: self._buffer_full = True # Only changes on first around.\ #! similarly here self.t = (t + T) % self.T + t_adv return T, idxs # Pass these on to subclass.
def append_samples(self, samples): """ Modified from BaseNStepReturnBuffer to check if replay should be saved each time we append new samples. This occurs when the replay fills up, where we intercept the sample writing right before wrapping. """ T, B = get_leading_dims(samples, n_dim=2) assert B == self.B t = self.t if t + T > self.T: # wrap, writing to disk when full cutoff = t + T - self.T tail_idxs = slice(t, self.T) head_idxs = slice(0, cutoff) self.samples[tail_idxs] = samples[:-cutoff] self.save_replay_buffer() self.samples[head_idxs] = samples[-cutoff:] idxs = np.arange(t, t + T) % self.T # for subclasses elif t + T == self.T: # filled, write to disk after idxs = slice(t, t + T) self.samples[idxs] = samples self.save_replay_buffer() else: idxs = slice(t, t + T) self.samples[idxs] = samples self.compute_returns(T) if not self._buffer_full and t + T >= self.T: self._buffer_full = True self.t = (t + T) % self.T return T, idxs
def __init__(self, total_n_samples, example_samples): self.total_n_samples = total_n_samples replay_samples = DiscrimReplaySamples( all_observation=example_samples.env.observation, all_action=example_samples.agent.action) T, B = get_leading_dims(replay_samples, n_dim=2) assert total_n_samples >= T * B > 0, (total_n_samples, T * B) self.circ_buf = buffer_from_example(replay_samples[0, 0], (total_n_samples, )) self.samples_in_buffer = 0 self.ptr = 0
def append_samples(self, samples): T, B = get_leading_dims(samples, n_dim=2) assert B == self.B t = self.t if t + T > self.T: # Wrap. idxs = np.arange(t, t + T) % self.T else: idxs = slice(t, t + T) self.samples[idxs] = samples if not self._buffer_full and t + T >= self.T: self._buffer_full = True self.t = (t + T) % self.T return T, idxs
def append_samples(self, samples): T, B = get_leading_dims(samples, n_dim=2) assert B == self.replay_buffer.B t = self.replay_buffer.t if t + T > self.replay_buffer.T: # Wrap. idxs = np.arange(t, t + T) % self.T else: idxs = slice(t, t + T) self.samples_reward[idxs] = samples.reward self.samples_done[idxs] = samples.done new_returns = self.compute_ul_returns(T) priorities = 1 + self.alpha * new_returns**self.beta self.priority_tree.advance(T, priorities=priorities) return self.replay_buffer.append_samples(samples)
def append_samples(self, samples): T, B = get_leading_dims(samples, n_dim=2) # samples.env.reward.shape[:2] assert B == self.B t = self.t if t + T > self.T: # Wrap. idxs = np.arange(t, t + T) % self.T else: idxs = slice(t, t + T) self.samples[idxs] = samples self.compute_returns(T) if not self._buffer_full and t + T >= self.T: self._buffer_full = True # Only changes on first around. self.t = (t + T) % self.T return T, idxs # Pass these on to subclass.
def append_samples(self, samples): T, B = get_leading_dims(samples, n_dim=2) assert B == self.B t = self.t if t + T > self.T: # Wrap. idxs = np.arange(t, t + T) % self.T else: idxs = slice(t, t + T) self.samples[idxs] = samples new_returns = self.compute_returns(T) if not self._buffer_full and t + T >= self.T: self._buffer_full = True self.t = (t + T) % self.T priorities = 1 + self.alpha * new_returns**self.beta self.priority_tree.advance(T, priorities=priorities) return T, idxs
def append_samples(self, samples): """Write the samples into the buffer and advance the time cursor. Handle wrapping of the cursor if necessary (boundary doesn't need to align with length of ``samples``). Compute and store returns with newly available rewards.""" T, B = get_leading_dims(samples, n_dim=2) # samples.env.reward.shape[:2] assert B == self.B t = self.t if t + T > self.T: # Wrap. idxs = np.arange(t, t + T) % self.T else: idxs = slice(t, t + T) self.samples[idxs] = samples # This copies value instead of copying reference. self.compute_returns(T) if not self._buffer_full and t + T >= self.T: self._buffer_full = True # Only changes on first around. self.t = (t + T) % self.T return T, idxs # Pass these on to subclass.
def __init__(self, example, **kwargs): field_names = [f for f in example._fields if f != "observation"] global BufferSamples BufferSamples = namedarraytuple("BufferSamples", field_names) buffer_example = BufferSamples(*(v for k, v in example.items() if k != "observation")) super().__init__(example=buffer_example, **kwargs) # Equivalent to image.shape[0] if observation is image array (C,H,W): self.n_frames = n_frames = get_leading_dims(example.observation, n_dim=1)[0] logger.log(f"Frame-based buffer using {n_frames}-frame sequences.") # frames: oldest stored at t; duplicate n_frames - 1 beginning & end. self.samples_frames = buffer_from_example(example.observation[0], (self.T + n_frames - 1, self.B), share_memory=self.async_) # [T+n_frames-1,B,H,W] # new_frames: shifted so newest stored at t; no duplication. self.samples_new_frames = self.samples_frames[n_frames - 1:] # [T,B,H,W] self.off_forward = max(self.off_forward, n_frames - 1)
def append_samples(self, samples): with self.rw_lock.write_lock: self._async_pull() # Updates from other writers. T, B = get_leading_dims(samples, n_dim=2) # samples.env.reward.shape[:2] num_new_sequences = B if self.t + num_new_sequences >= self.buffer_size: num_new_sequences = self.buffer_size - self.t B_idxs = np.arange(self.t, self.t + num_new_sequences) self.samples_prev_rnn_state[B_idxs] = samples.prev_rnn_state[ 0, :num_new_sequences] self.samples[:, self.t:self.t + num_new_sequences] = self.SamplesToBuffer( *(v[:, :num_new_sequences] for k, v in samples.items() if k != "prev_rnn_state")) self._buffer_full = self._buffer_full or ( self.t + num_new_sequences) == self.buffer_size self.t = (self.t + num_new_sequences) % self.buffer_size self._async_push() # Updates to other writers + readers.
def append_samples(self, samples): """Write the samples into the buffer and advance the time cursor. Handle wrapping of the cursor if necessary (boundary doesn't need to align with length of ``samples``). Compute and store returns with newly available rewards.""" # filter out the invalid states after_done = samples.done.squeeze().roll(1) #fill the very first element as valid after_done[0] = False # Extract all the valid samples samples = samples[(after_done == False).nonzero().squeeze()] T, B = get_leading_dims(samples, n_dim=2) # samples.env.reward.shape[:2] assert B == self.B t = self.t if t + T > self.T: # Wrap. idxs = np.arange(t, t + T) % self.T else: idxs = slice(t, t + T) self.samples[idxs] = samples if not self._buffer_full and t + T >= self.T: self._buffer_full = True # Only changes on first around. self.t = (t + T) % self.T return T, idxs # Pass these on to subclass.
def append_samples(self, samples): """ Appends all samples except for the `observation` as normal. Only the new frame in each observation is recorded. Modified from `FrameBufferMixin` append_samples to appropriately store frames when the buffer wraps over (and is written to disk). """ t, fm1 = self.t, self.n_frames - 1 buffer_samples = BufferSamples(*(v for k, v in samples.items() if k != "observation")) if t == 0: # starting: write early frames for f in range(fm1): self.samples_frames[f] = samples.observation[0, :, f] T, B = get_leading_dims(samples, n_dim=2) if t + T > self.T: # wrap, store tail frames before saving cutoff = t + T - self.T tail_idxs = slice(t, self.T) head_idxs = slice(0, cutoff) self.samples_new_frames[ tail_idxs] = samples.observation[:-cutoff, :, -1] _, idxs = super().append_samples( buffer_samples) # saved here; idxs for subclasses self.samples_new_frames[head_idxs] = samples.observation[ -cutoff:, :, -1] if fm1 > 0: # copy any duplicate frames self.samples_frames[:fm1] = self.samples_frames[-fm1:] else: idxs = slice(t, t + T) self.samples_new_frames[idxs] = samples.observation[:, :, -1] super().append_samples( buffer_samples) # may still save replay if new t == 0 return T, idxs
def append_samples(self, samples): """Append samples drawn drawn from a sampler. Should be namedarraytuple with leading dimensions `(time_steps, batch_size)`.""" replay_samples = DiscrimReplaySamples( all_observation=samples.env.observation, all_action=samples.agent.action) T, B = get_leading_dims(replay_samples, n_dim=2) # if there's not enough room for a single full round of sampling then # the buffer is _probably_ too small. assert T * B <= self.total_n_samples, \ f"There's not enough room in this buffer for a single full " \ f"batch! T*B={T*B} > total_n_samples={self.total_n_samples}" flat_samples = buffer_func( replay_samples, lambda t: t.reshape((T * B, ) + t.shape[2:])) n_copied = 0 while n_copied < T * B: # only copy to the end n_to_copy = min(T * B - n_copied, self.total_n_samples - self.ptr) self.circ_buf[self.ptr:self.ptr + n_to_copy] \ = flat_samples[n_copied:n_copied + n_to_copy] n_copied += n_to_copy self.ptr = (self.ptr + n_to_copy) % self.total_n_samples self.samples_in_buffer = min(self.total_n_samples, self.samples_in_buffer + n_to_copy)