Python normalize_states_and_next_states示例

编程语言: Python

命名空间/包名称: slm_lab.agent.algorithm.policy_util

方法/功能: normalize_states_and_next_states

hotexamples.com的示例: 8

Python normalize_states_and_next_states - 已找到8个示例。这些是从开源项目中提取的最受好评的slm_lab.agent.algorithm.policy_util.normalize_states_and_next_states现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： hydra_dqn.py 项目： xenakas/SLM-Lab

 def space_sample(self):
     '''
     Samples a batch from memory.
     Note that multitask's bodies are parallelized copies with similar envs, just to get more batch sizes
     '''
     batches = []
     for body in self.agent.nanflat_body_a:
         body_batch = body.memory.sample()
         # one-hot actions to calc q_targets
         if body.is_discrete:
             body_batch['actions'] = util.to_one_hot(
                 body_batch['actions'], body.action_space.high)
         if self.normalize_state:
             body_batch = policy_util.normalize_states_and_next_states(
                 body, body_batch)
         body_batch = util.to_torch_batch(body_batch, self.net.device,
                                          body.memory.is_episodic)
         batches.append(body_batch)
     # Concat states at dim=1 for feedforward
     batch = {
         'states':
         torch.cat([body_batch['states'] for body_batch in batches], dim=1),
         'next_states':
         torch.cat([body_batch['next_states'] for body_batch in batches],
                   dim=1),
     }
     # retain body-batches for body-wise q_targets calc
     batch['body_batches'] = batches
     return batch

示例#2

显示文件

 def sample(self):
     '''Samples a batch from memory of size self.memory_spec['batch_size']'''
     batch = self.body.memory.sample()
     if self.normalize_state:
         batch = policy_util.normalize_states_and_next_states(self.body, batch)
     batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic)
     return batch

示例#3

显示文件

文件： sil.py 项目： wilson1yan/SLM-Lab

 def replay_sample(self):
     '''Samples a batch from memory'''
     batch = self.body.replay_memory.sample()
     if self.normalize_state:
         batch = policy_util.normalize_states_and_next_states(
             self.body, batch, episodic_flag=self.body.replay_memory.is_episodic)
     batch = util.to_torch_batch(batch, self.net.device, self.body.replay_memory.is_episodic)
     assert not torch.isnan(batch['states']).any(), batch['states']
     return batch

示例#4

显示文件

文件： dqn.py 项目： xiangshengcn/SLM-Lab

 def sample(self):
     '''Samples a batch from memory of size self.memory_spec['batch_size']'''
     batch = self.body.memory.sample()
     # one-hot actions to calc q_targets
     if self.body.is_discrete:
         batch['actions'] = util.to_one_hot(batch['actions'], self.body.action_space.high)
     if self.normalize_state:
         batch = policy_util.normalize_states_and_next_states(self.body, batch)
     batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic)
     return batch

示例#5

显示文件

 def space_sample(self):
     '''Samples a batch per body, which may experience different environment'''
     batch = {k: [] for k in self.body.memory.data_keys}
     for body in self.agent.nanflat_body_a:
         body_batch = body.memory.sample()
         if self.normalize_state:
             body_batch = policy_util.normalize_states_and_next_states(body, body_batch)
         body_batch = util.to_torch_batch(body_batch, self.net.device, body.memory.is_episodic)
         for k, arr in batch.items():
             arr.append(body_batch[k])
     return batch

示例#6

显示文件

文件： sil.py 项目： wilson1yan/SLM-Lab

 def sample(self):
     '''Modify the onpolicy sample to also append to replay'''
     batch = self.body.memory.sample()
     batch = {k: np.concatenate(v) for k, v in batch.items()}  # concat episodic memory
     batch['rets'] = math_util.calc_returns(batch, self.gamma)
     for idx in range(len(batch['dones'])):
         tuples = [batch[k][idx] for k in self.body.replay_memory.data_keys]
         self.body.replay_memory.add_experience(*tuples)
     if self.normalize_state:
         batch = policy_util.normalize_states_and_next_states(self.body, batch)
     batch = util.to_torch_batch(batch, self.net.device, self.body.replay_memory.is_episodic)
     return batch

示例#7

显示文件

文件： sarsa.py 项目： wilson1yan/SLM-Lab

 def sample(self):
     '''Samples a batch from memory'''
     batch = self.body.memory.sample()
     # this is safe for next_action at done since the calculated act_next_q_preds will be multiplied by (1 - batch['dones'])
     batch['next_actions'] = np.zeros_like(batch['actions'])
     batch['next_actions'][:-1] = batch['actions'][1:]
     if self.normalize_state:
         batch = policy_util.normalize_states_and_next_states(
             self.body, batch)
     batch = util.to_torch_batch(batch, self.net.device,
                                 self.body.memory.is_episodic)
     return batch

示例#8

显示文件

文件： hydra_dqn.py 项目： xenakas/SLM-Lab

 def space_sample(self):
     '''Samples a batch per body, which may experience different environment'''
     batches = []
     for body in self.agent.nanflat_body_a:
         body_batch = body.memory.sample()
         # one-hot actions to calc q_targets
         if body.is_discrete:
             body_batch['actions'] = util.to_one_hot(
                 body_batch['actions'], body.action_space.high)
         if self.normalize_state:
             body_batch = policy_util.normalize_states_and_next_states(
                 body, body_batch)
         body_batch = util.to_torch_batch(body_batch, self.net.device,
                                          body.memory.is_episodic)
         batches.append(body_batch)
     # collect per body for feedforward to hydra heads
     batch = {
         'states': [body_batch['states'] for body_batch in batches],
         'next_states':
         [body_batch['next_states'] for body_batch in batches],
     }
     # retain body-batches for body-wise q_targets calc
     batch['body_batches'] = batches
     return batch