def replay_sample(self): '''Samples a batch from memory''' batches = [body.replay_memory.sample() for body in self.agent.nanflat_body_a] batch = util.concat_batches(batches) batch = util.to_torch_batch(batch, self.net.gpu) assert not torch.isnan(batch['states']).any() return batch
def replay_sample(self): '''Samples a batch from memory''' batches = [ body.replay_memory.sample() for body in self.agent.nanflat_body_a ] batch = util.concat_batches(batches) batch = util.to_torch_batch(batch, self.net.gpu) assert not torch.isnan(batch['states']).any() return batch
def sample(self): '''Modify the onpolicy sample to also append to replay''' batches = [body.memory.sample() for body in self.agent.nanflat_body_a] batch = util.concat_batches(batches) data_keys = self.body.replay_memory.data_keys for idx in range(len(batch['dones'])): tuples = [batch[k][idx] for k in data_keys] self.body.replay_memory.add_experience(*tuples) batch = util.to_torch_batch(batch, self.net.gpu) return batch
def space_sample(self): '''Samples a batch from memory''' batches = [] for body in self.agent.nanflat_body_a: self.body = body batches.append(self.sample()) # set body reference back to default self.body = self.agent.nanflat_body_a[0] batch = util.concat_batches(batches) batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic) return batch
def sample(self): '''Samples a batch from memory of size self.memory_spec['batch_size']''' batches = [] for body in self.agent.nanflat_body_a: body_batch = body.memory.sample() # one-hot actions to calc q_targets if body.is_discrete: body_batch['actions'] = util.to_one_hot(body_batch['actions'], body.action_space.high) batches.append(body_batch) batch = util.concat_batches(batches) batch = util.to_torch_batch(batch, self.net.gpu) return batch
def sample(self): '''Samples a batch from memory''' batches = [] for body in self.agent.nanflat_body_a: body_batch = body.memory.sample() # one-hot actions to calc q_targets if body.is_discrete: body_batch['one_hot_actions'] = util.to_one_hot(body_batch['actions'], body.action_space.high) batches.append(body_batch) batch = util.concat_batches(batches) # this is safe for next_action at done since the calculated act_next_q_preds will be multiplied by (1 - batch['dones']) batch['next_actions'] = np.zeros_like(batch['actions']) batch['next_actions'][:-1] = batch['actions'][1:] batch = util.to_torch_batch(batch, self.net.gpu) return batch
def sample(self): '''Samples a batch from memory''' batches = [body.memory.sample() for body in self.agent.nanflat_body_a] batch = util.concat_batches(batches) batch = util.to_torch_batch(batch, self.net.gpu) return batch