def _update_random_action(spec, noisy_action): random_action = spec_utils.scale_to_spec( torch.rand_like(noisy_action) * 2 - 1, spec) ind = torch.where( torch.rand(noisy_action.shape[:1]) < self._rollout_random_action) noisy_action[ind[0], :] = random_action[ind[0], :]
def forward(self, observation, state=()): """Computes action given an observation. Args: inputs: A tensor consistent with ``input_tensor_spec`` state: empty for API consistent with ``ActorRNNNetwork`` Returns: tuple: - action (torch.Tensor): a tensor consistent with ``action_spec`` - state: empty """ observation, state = super().forward(observation, state) encoded_obs, _ = self._encoding_net(observation) actions = [] i = 0 for layer, spec in zip(self._action_layers, self._flat_action_spec): pre_activation = layer(encoded_obs) action = self._squashing_func(pre_activation) action = spec_utils.scale_to_spec(action, spec) if alf.summary.should_summarize_output(): alf.summary.scalar( name='summarize_output/' + self.name + '.action_layer.' + str(i) + '.pre_activation.output_norm.' + common.exe_mode_name(), data=torch.mean( pre_activation.norm( dim=list(range(1, pre_activation.ndim))))) a_name = ('summarize_output/' + self.name + '.action_layer.' + str(i) + '.action.output_norm.' + common.exe_mode_name()) alf.summary.scalar( name=a_name, data=torch.mean( action.norm(dim=list(range(1, action.ndim))))) actions.append(action) i += 1 output_actions = nest.pack_sequence_as(self._action_spec, actions) return output_actions, state
def forward(self, observation, state): """Computes action given an observation. Args: inputs: A tensor consistent with ``input_tensor_spec`` state (nest[tuple]): a nest structure of state tuples ``(h, c)`` Returns: tuple: - action (torch.Tensor): a tensor consistent with ``action_spec`` - new_state (nest[tuple]): the updated states """ observation, state = super().forward(observation, state) encoded_obs, state = self._lstm_encoding_net(observation, state) actions = [] for layer, spec in zip(self._action_layers, self._flat_action_spec): action = layer(encoded_obs) action = spec_utils.scale_to_spec(action, spec) actions.append(action) output_actions = nest.pack_sequence_as(self._action_spec, actions) return output_actions, state