示例#1
0
 def space_sample(self):
     '''
     Samples a batch from memory.
     Note that multitask's bodies are parallelized copies with similar envs, just to get more batch sizes
     '''
     batches = []
     for body in self.agent.nanflat_body_a:
         body_batch = body.memory.sample()
         # one-hot actions to calc q_targets
         if body.is_discrete:
             body_batch['actions'] = util.to_one_hot(
                 body_batch['actions'], body.action_space.high)
         if self.normalize_state:
             body_batch = policy_util.normalize_states_and_next_states(
                 body, body_batch)
         body_batch = util.to_torch_batch(body_batch, self.net.device,
                                          body.memory.is_episodic)
         batches.append(body_batch)
     # Concat states at dim=1 for feedforward
     batch = {
         'states':
         torch.cat([body_batch['states'] for body_batch in batches], dim=1),
         'next_states':
         torch.cat([body_batch['next_states'] for body_batch in batches],
                   dim=1),
     }
     # retain body-batches for body-wise q_targets calc
     batch['body_batches'] = batches
     return batch
示例#2
0
 def sample(self):
     '''Samples a batch from memory of size self.memory_spec['batch_size']'''
     batch = self.body.memory.sample()
     if self.normalize_state:
         batch = policy_util.normalize_states_and_next_states(self.body, batch)
     batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic)
     return batch
示例#3
0
文件: sil.py 项目: wilson1yan/SLM-Lab
 def replay_sample(self):
     '''Samples a batch from memory'''
     batch = self.body.replay_memory.sample()
     if self.normalize_state:
         batch = policy_util.normalize_states_and_next_states(
             self.body, batch, episodic_flag=self.body.replay_memory.is_episodic)
     batch = util.to_torch_batch(batch, self.net.device, self.body.replay_memory.is_episodic)
     assert not torch.isnan(batch['states']).any(), batch['states']
     return batch
示例#4
0
 def sample(self):
     '''Samples a batch from memory of size self.memory_spec['batch_size']'''
     batch = self.body.memory.sample()
     # one-hot actions to calc q_targets
     if self.body.is_discrete:
         batch['actions'] = util.to_one_hot(batch['actions'], self.body.action_space.high)
     if self.normalize_state:
         batch = policy_util.normalize_states_and_next_states(self.body, batch)
     batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic)
     return batch
示例#5
0
 def space_sample(self):
     '''Samples a batch per body, which may experience different environment'''
     batch = {k: [] for k in self.body.memory.data_keys}
     for body in self.agent.nanflat_body_a:
         body_batch = body.memory.sample()
         if self.normalize_state:
             body_batch = policy_util.normalize_states_and_next_states(body, body_batch)
         body_batch = util.to_torch_batch(body_batch, self.net.device, body.memory.is_episodic)
         for k, arr in batch.items():
             arr.append(body_batch[k])
     return batch
示例#6
0
文件: sil.py 项目: wilson1yan/SLM-Lab
 def sample(self):
     '''Modify the onpolicy sample to also append to replay'''
     batch = self.body.memory.sample()
     batch = {k: np.concatenate(v) for k, v in batch.items()}  # concat episodic memory
     batch['rets'] = math_util.calc_returns(batch, self.gamma)
     for idx in range(len(batch['dones'])):
         tuples = [batch[k][idx] for k in self.body.replay_memory.data_keys]
         self.body.replay_memory.add_experience(*tuples)
     if self.normalize_state:
         batch = policy_util.normalize_states_and_next_states(self.body, batch)
     batch = util.to_torch_batch(batch, self.net.device, self.body.replay_memory.is_episodic)
     return batch
示例#7
0
 def sample(self):
     '''Samples a batch from memory'''
     batch = self.body.memory.sample()
     # this is safe for next_action at done since the calculated act_next_q_preds will be multiplied by (1 - batch['dones'])
     batch['next_actions'] = np.zeros_like(batch['actions'])
     batch['next_actions'][:-1] = batch['actions'][1:]
     if self.normalize_state:
         batch = policy_util.normalize_states_and_next_states(
             self.body, batch)
     batch = util.to_torch_batch(batch, self.net.device,
                                 self.body.memory.is_episodic)
     return batch
示例#8
0
 def space_sample(self):
     '''Samples a batch per body, which may experience different environment'''
     batches = []
     for body in self.agent.nanflat_body_a:
         body_batch = body.memory.sample()
         # one-hot actions to calc q_targets
         if body.is_discrete:
             body_batch['actions'] = util.to_one_hot(
                 body_batch['actions'], body.action_space.high)
         if self.normalize_state:
             body_batch = policy_util.normalize_states_and_next_states(
                 body, body_batch)
         body_batch = util.to_torch_batch(body_batch, self.net.device,
                                          body.memory.is_episodic)
         batches.append(body_batch)
     # collect per body for feedforward to hydra heads
     batch = {
         'states': [body_batch['states'] for body_batch in batches],
         'next_states':
         [body_batch['next_states'] for body_batch in batches],
     }
     # retain body-batches for body-wise q_targets calc
     batch['body_batches'] = batches
     return batch